wpconv 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +103 -0
- data/Rakefile +7 -0
- data/bin/wpconv +12 -0
- data/lib/wpconv.rb +6 -0
- data/lib/wpconv/cli.rb +15 -0
- data/lib/wpconv/converter.rb +133 -0
- data/lib/wpconv/filter/markdown.rb +79 -0
- data/lib/wpconv/filter/none.rb +9 -0
- data/lib/wpconv/version.rb +3 -0
- data/lib/wpconv/wp_xml/channel.rb +57 -0
- data/lib/wpconv/wp_xml/item.rb +41 -0
- data/spec/spec_helper.rb +16 -0
- data/spec/wpconv/converter_spec.rb +8 -0
- data/spec/wpconv/filter/markdown_spec.rb +150 -0
- data/spec/wpconv_spec.rb +5 -0
- data/template/markdown.erb +9 -0
- data/wpconv.gemspec +27 -0
- metadata +154 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 2684b45959f0ac30ea37d84efdc9fbf89bb23ee0
|
4
|
+
data.tar.gz: dec75301a95d5e1d3363562e176924f8da6368c4
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3cb60ddc4ed0257d241f7e1de61ba4ef5d083dc81fda0077144eab093bc4c1721f0d48f3c63046165d39eacd0ccad2bb6f8353edc8cff37d674d07b780094e8e
|
7
|
+
data.tar.gz: 9834b73a132f01730f8187d517da78eb72550cdb9eb32dc84e7a4da552a7ad76e574e63c05913b0388700e351f91bcfc4e06e639de3e92bec17537127574a422
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 akahige
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
# Wpconv
|
2
|
+
|
3
|
+
Converting Wordpress export XML to Markdown(or other format).
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
$ gem install wpconv
|
8
|
+
|
9
|
+
## Usage
|
10
|
+
|
11
|
+
wpconv convert WP_XML_PATH
|
12
|
+
|
13
|
+
Options:
|
14
|
+
-o, [--output-dir=/path/to/output_dir]
|
15
|
+
-t, [--template=/path/to/your_template.erb]
|
16
|
+
-n, [--filename-format=date-name(default), name or id]
|
17
|
+
-f, [--filter=markdown(default), none or /path/to/your_filter.rb]
|
18
|
+
|
19
|
+
example:
|
20
|
+
|
21
|
+
$ wpconv convert wordpress.2014-08-21.xml -o /tmp -n id
|
22
|
+
|
23
|
+
This example creates Markdown files from Wordpress export XML.
|
24
|
+
The output directory is /tmp and the output filenames are based on Wordpress post_id in this case.
|
25
|
+
|
26
|
+
`-o` is to specify the output direcoty.
|
27
|
+
|
28
|
+
`-n` is to specify the format of filename.
|
29
|
+
|
30
|
+
`-t` and `-f` are advanced options to customize output. If you would like to use these options, you should write an erb template or some ruby code.
|
31
|
+
See templating and filter sections for more details.
|
32
|
+
|
33
|
+
## Templating
|
34
|
+
|
35
|
+
You can create a custom erb template to adjust outputs as you like.
|
36
|
+
|
37
|
+
template valiables `@item` and `@channel` are available.
|
38
|
+
These are Hash objects including wordpress items and channel data.
|
39
|
+
|
40
|
+
Specify `-t` option if you would like to use your template.
|
41
|
+
|
42
|
+
$ wpconv convert wordpress.2014-08-21.xml -o /tmp -t my_markdown.erb
|
43
|
+
|
44
|
+
This is the default template for your information.
|
45
|
+
|
46
|
+
---
|
47
|
+
title: <%= @item[:title] %>
|
48
|
+
date: <%= @item[:post_date] %>
|
49
|
+
layout: <%= @item[:post_type] %>
|
50
|
+
categories: [<%= @item[:categories].join(',') %>]
|
51
|
+
tags: [<%= @item[:tags].join(',') %>]
|
52
|
+
---
|
53
|
+
|
54
|
+
<%= @item[:content] %>
|
55
|
+
|
56
|
+
## Filter
|
57
|
+
|
58
|
+
You can use a custom filter for a camplicate converting logic.
|
59
|
+
A filter affects @item[:content].
|
60
|
+
|
61
|
+
You should write some ruby code for creating a custom filter.
|
62
|
+
|
63
|
+
The example below is a built in filter 'none'.
|
64
|
+
All filter classes should be under `Wpconv::Filter` module. And the class name should be the camelized file name.
|
65
|
+
|
66
|
+
module Wpconv
|
67
|
+
module Filter
|
68
|
+
class None
|
69
|
+
def self.apply(source_content)
|
70
|
+
source_content
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
Another example, creating 'my_filter'.
|
77
|
+
|
78
|
+
module Wpconv
|
79
|
+
module Filter
|
80
|
+
class MyFilter
|
81
|
+
def self.apply(source_content)
|
82
|
+
source_content.tap do |content|
|
83
|
+
content.gsub!(/foo/, 'bar')
|
84
|
+
|
85
|
+
# write the filter logic here...
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
Specify `-f` option if you would like to use your filter.
|
94
|
+
|
95
|
+
$ wpconv convert wordpress.2014-08-21.xml -o /tmp -f my_filter.rb
|
96
|
+
|
97
|
+
## Contributing
|
98
|
+
|
99
|
+
1. Fork it ( https://github.com/[my-github-username]/wpconv/fork )
|
100
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
101
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
102
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
103
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/bin/wpconv
ADDED
data/lib/wpconv.rb
ADDED
data/lib/wpconv/cli.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'thor'
|
2
|
+
require 'readline'
|
3
|
+
|
4
|
+
module Wpconv
|
5
|
+
class CLI < Thor
|
6
|
+
desc "convert WP_XML_PATH", "convert wordpress export xml to markdown."
|
7
|
+
option :output_dir, :type => :string, :aliases => '-o', :banner => '/path/to/output_dir'
|
8
|
+
option :template, :type => :string, :aliases => '-t', :banner => '/path/to/your_template.erb'
|
9
|
+
option :filename_format, :type => :string, :aliases => '-n', :banner => 'date-name(default), name or id'
|
10
|
+
option :filter, :type => :string, :aliases => '-f', :banner => 'markdown(default), none or /path/to/your_filter.rb'
|
11
|
+
def convert(wp_xml_path)
|
12
|
+
Wpconv::Converter.new.run(wp_xml_path, options)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'erb'
|
3
|
+
require 'active_support/all'
|
4
|
+
|
5
|
+
require 'wpconv/filter/markdown'
|
6
|
+
require 'wpconv/filter/none'
|
7
|
+
|
8
|
+
require 'wpconv/wp_xml/channel'
|
9
|
+
require 'wpconv/wp_xml/item'
|
10
|
+
|
11
|
+
module Wpconv
|
12
|
+
class Converter
|
13
|
+
DEFAULT_OPTIONS = {
|
14
|
+
output_dir: Dir.pwd,
|
15
|
+
template: File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'template', 'markdown.erb')),
|
16
|
+
filename_format: 'date-name',
|
17
|
+
filter: 'markdown'
|
18
|
+
}
|
19
|
+
BUILT_IN_FILTERS = ['markdown', 'none']
|
20
|
+
|
21
|
+
def run(wp_xml_path, options = {})
|
22
|
+
@wp_xml_path = wp_xml_path
|
23
|
+
|
24
|
+
@template = options[:template] || DEFAULT_OPTIONS[:template]
|
25
|
+
erb = File.open(@template) {|f| ERB.new(f.read)}
|
26
|
+
|
27
|
+
@output_base_dir = options[:output_dir] || DEFAULT_OPTIONS[:output_dir]
|
28
|
+
setup_output_dirs
|
29
|
+
|
30
|
+
@filename_format = options[:filename_format] || DEFAULT_OPTIONS[:filename_format]
|
31
|
+
|
32
|
+
@filter = options[:filter] || DEFAULT_OPTIONS[:filter]
|
33
|
+
|
34
|
+
print "converting...\n"
|
35
|
+
print_convert_settings
|
36
|
+
|
37
|
+
doc = ::Nokogiri::XML(File.open(@wp_xml_path).read)
|
38
|
+
@channel = WpXML::Channel.parse(doc.at('channel'))
|
39
|
+
|
40
|
+
@convert_counts = {page: 0, post: 0, other: 0}
|
41
|
+
|
42
|
+
doc.search('item').each do |doc_item|
|
43
|
+
@item = WpXML::Item.parse(doc_item)
|
44
|
+
|
45
|
+
# filter
|
46
|
+
if not BUILT_IN_FILTERS.include?(@filter)
|
47
|
+
@filter = "./#{@filter}" if not @filter =~ /\//
|
48
|
+
require @filter
|
49
|
+
end
|
50
|
+
filter_class_name = File.basename(@filter).sub(/.rb$/, '').camelize
|
51
|
+
@item[:content] = eval("Filter::#{filter_class_name}.apply(@item[:content])")
|
52
|
+
|
53
|
+
# output
|
54
|
+
File.open(File.join(item_output_dir, item_filename), "w") do |f|
|
55
|
+
converted = erb.result(binding)
|
56
|
+
f.write(converted)
|
57
|
+
end
|
58
|
+
|
59
|
+
increase_convert_count
|
60
|
+
|
61
|
+
print "."
|
62
|
+
end
|
63
|
+
|
64
|
+
print "done.\n"
|
65
|
+
print "#{@convert_counts[:page]} pages, #{@convert_counts[:post]} posts and #{@convert_counts[:other]} something items are converted.\n"
|
66
|
+
end
|
67
|
+
|
68
|
+
def setup_output_dirs
|
69
|
+
@output_dirs = {
|
70
|
+
page: File.join(@output_base_dir, 'pages'),
|
71
|
+
post: File.join(@output_base_dir, 'posts'),
|
72
|
+
other: File.join(@output_base_dir, 'others')
|
73
|
+
}
|
74
|
+
@output_dirs.each do |k, output_dir|
|
75
|
+
FileUtils.mkdir_p(output_dir)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def increase_convert_count
|
80
|
+
if @convert_counts.has_key? @item[:post_type].to_sym
|
81
|
+
@convert_counts[@item[:post_type].to_sym] += 1
|
82
|
+
else
|
83
|
+
@convert_counts[:other] += 1
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def item_filename
|
88
|
+
post_name = @item[:post_name] == '' ? @item[:post_id] : @item[:post_name]
|
89
|
+
case @filename_format
|
90
|
+
when 'date-name'
|
91
|
+
"#{@item[:post_date].split(' ').first}-#{post_name}.md"
|
92
|
+
when 'name'
|
93
|
+
"#{post_name}.md"
|
94
|
+
when 'id'
|
95
|
+
"#{@item[:post_id]}.md"
|
96
|
+
else
|
97
|
+
"#{@item[:post_id]}.md"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def item_output_dir
|
102
|
+
case @item[:post_type]
|
103
|
+
when 'post'
|
104
|
+
@output_dirs[:post]
|
105
|
+
when 'page'
|
106
|
+
@output_dirs[:page]
|
107
|
+
else
|
108
|
+
@output_dirs[:other]
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def print_convert_settings
|
113
|
+
print " soruce: #{@wp_xml_path}\n"
|
114
|
+
print " template: #{@template}\n"
|
115
|
+
print " output_dir: #{@output_base_dir}\n"
|
116
|
+
print " filename_format: #{@filename_format}\n"
|
117
|
+
print " filter: #{@filter}\n"
|
118
|
+
end
|
119
|
+
|
120
|
+
def default_template
|
121
|
+
File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'template', 'markdown.erb'))
|
122
|
+
end
|
123
|
+
|
124
|
+
def default_filename_format
|
125
|
+
'date-name'
|
126
|
+
end
|
127
|
+
|
128
|
+
def default_filter
|
129
|
+
'markdown'
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Wpconv
|
4
|
+
module Filter
|
5
|
+
class Markdown
|
6
|
+
# see. http://daringfireball.net/projects/markdown/syntax
|
7
|
+
|
8
|
+
BackslashEscapedCharacters = %w(\\ ` * _ { } [ ] \( \) # + - . !)
|
9
|
+
|
10
|
+
def self.apply(source_content)
|
11
|
+
escaped_source_content = escape_literal(source_content)
|
12
|
+
convert_html_tags(escaped_source_content)
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.escape_literal(source_content)
|
16
|
+
html = Nokogiri::HTML(source_content)
|
17
|
+
source_content.tap do |content|
|
18
|
+
BackslashEscapedCharacters.each do |char|
|
19
|
+
content.gsub!(char) { "\\#{char}" }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.convert_html_tags(escaped_source_content)
|
25
|
+
escaped_source_content.tap do |content|
|
26
|
+
# Heading
|
27
|
+
{
|
28
|
+
'h1' => '#',
|
29
|
+
'h2' => '##',
|
30
|
+
'h3' => '###',
|
31
|
+
'h4' => '####',
|
32
|
+
'h5' => '#####',
|
33
|
+
'h6' => '######'
|
34
|
+
}.each do |tag, md|
|
35
|
+
content.gsub!(/<#{tag}>(.+?)<\/#{tag}>/m) { "#{md} #{$1.gsub(/\n/, '')}" } # remove LF in the header string
|
36
|
+
end
|
37
|
+
|
38
|
+
# List 定義リストは未サポート
|
39
|
+
%w(ul ol).each do |tag|
|
40
|
+
content.gsub!(/<\/?#{tag}>[\n]?/, '') # remove LF after the tag
|
41
|
+
end
|
42
|
+
content.gsub!(/[ \t]*?<li.*?>(.+?)<\/li>[ \t]*?/m) { "* #{$1.strip}" } # 数値リストも未対応
|
43
|
+
|
44
|
+
# hr
|
45
|
+
content.gsub!(/<hr\s?\/?>/, "\n---\n")
|
46
|
+
|
47
|
+
# pre and code
|
48
|
+
content.gsub!(/<pre.*?><code.*?>(.*?)<\/code><\/pre>/m) { "#{decode_markup_symbol($1).gsub(/^/, ' ')}" }
|
49
|
+
|
50
|
+
# code
|
51
|
+
content.gsub!(/<code.*?>(.*?)<\/code>/m) { '`` ' + decode_markup_symbol($1) + ' ``' }
|
52
|
+
|
53
|
+
# brockquote
|
54
|
+
content.gsub!(/<blockquote>(.*?)<\/blockquote>/m) { "#{$1.gsub(/^/, '> ')}" }
|
55
|
+
|
56
|
+
# em
|
57
|
+
content.gsub!(/<em>(.*?)<\/em>/m) { "*#{$1}*" }
|
58
|
+
|
59
|
+
# strong, b
|
60
|
+
["strong", "b"].each do |tag|
|
61
|
+
content.gsub!(/<#{tag}>(.*?)<\/#{tag}>/m) { "**#{$1}**" }
|
62
|
+
end
|
63
|
+
|
64
|
+
# Link
|
65
|
+
content.gsub!(/<a.*?href=("|')(.+?)("|').*?>(.+?)<\/a>/m) { "[#{$4}](#{$2})" }
|
66
|
+
|
67
|
+
# Image
|
68
|
+
|
69
|
+
# Table?
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.decode_markup_symbol(code)
|
75
|
+
code.gsub('<', '<').gsub('>', '>').gsub('&', '&')
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module Wpconv
|
2
|
+
module WpXML
|
3
|
+
class Channel
|
4
|
+
def self.parse(doc_channel)
|
5
|
+
{}.tap do |channel|
|
6
|
+
%w(title link description pubDate language).each do |key|
|
7
|
+
if node = doc_channel.at(key)
|
8
|
+
channel[key.to_sym] = node.text
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
%w(wxr_version base_site_url base_blog_url).each do |key|
|
13
|
+
if node = doc_channel.at("wp|#{key}")
|
14
|
+
channel[key.to_sym] = node.text
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# author
|
19
|
+
channel[:authors] = [].tap do |authors|
|
20
|
+
doc_channel.search("wp|author").each do |author|
|
21
|
+
authors.push({
|
22
|
+
id: author.at("wp|author_id").text,
|
23
|
+
login: author.at("wp|author_login").text,
|
24
|
+
email: author.at("wp|author_email").text,
|
25
|
+
name: author.at("wp|author_display_name").text,
|
26
|
+
first_name: author.at("wp|author_first_name").text,
|
27
|
+
last_name: author.at("wp|author_last_name").text
|
28
|
+
})
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# category and tag
|
33
|
+
channel[:categories] = [].tap do |categories|
|
34
|
+
doc_channel.search("wp|category").each do |cat|
|
35
|
+
categories.push({
|
36
|
+
id: cat.at("wp|term_id").text,
|
37
|
+
name: cat.at("wp|cat_name").text,
|
38
|
+
nickname: cat.at("wp|category_nicename").text,
|
39
|
+
parent: cat.at("wp|category_parent").text
|
40
|
+
})
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
channel[:tags] = [].tap do |tags|
|
45
|
+
doc_channel.search("wp|tag").each do |tag|
|
46
|
+
tags.push({
|
47
|
+
id: tag.at("wp|term_id").text,
|
48
|
+
name: tag.at("wp|tag_name").text,
|
49
|
+
slug: tag.at("wp|tag_slug").text
|
50
|
+
})
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Wpconv
|
2
|
+
module WpXML
|
3
|
+
class Item
|
4
|
+
def self.parse(doc_item)
|
5
|
+
{}.tap do |item|
|
6
|
+
%w(title link pubDate guid description).each do |key|
|
7
|
+
if node = doc_item.at(key)
|
8
|
+
item[key.to_sym] = node.text
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
item[:creator] = doc_item.at('dc|creator').text
|
13
|
+
item[:content] = doc_item.at('content|encoded').text
|
14
|
+
item[:excerpt] = doc_item.at('excerpt|encoded').text
|
15
|
+
|
16
|
+
%w(post_id post_date post_date_gmt comment_status ping_status post_name status post_parent menu_order post_type post_password is_sticky).each do |key|
|
17
|
+
if node = doc_item.at("wp|#{key}")
|
18
|
+
item[key.to_sym] = node.text
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
item[:categories] = []
|
23
|
+
item[:tags] = []
|
24
|
+
doc_item.search('category').each do |cat|
|
25
|
+
case cat["domain"]
|
26
|
+
when 'category'
|
27
|
+
item[:categories].push cat.text
|
28
|
+
when 'post_tag'
|
29
|
+
item[:tags].push cat.text
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
item[:postmeta] = {}
|
34
|
+
doc_item.search('wp|postmeta').each do |meta|
|
35
|
+
item[:postmeta][meta.at('wp|meta_key').text.to_sym] = meta.at('wp|meta_value').text
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
$TESTING=true
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'rspec'
|
7
|
+
require 'wpconv'
|
8
|
+
|
9
|
+
Dir[File.join(File.dirname(__FILE__), "..", "lib", "**/*.rb")].each do |f|
|
10
|
+
require f
|
11
|
+
end
|
12
|
+
|
13
|
+
RSpec.configure do |config|
|
14
|
+
config.color_enabled = true
|
15
|
+
config.filter_run_excluding :skip => true
|
16
|
+
end
|
@@ -0,0 +1,150 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Wpconv::Filter::Markdown do
|
5
|
+
describe "converting HTML tags" do
|
6
|
+
describe "heading" do
|
7
|
+
it "h1" do
|
8
|
+
content = Wpconv::Filter::Markdown.apply("<h1>hoge</h1>")
|
9
|
+
expect(content).to eq "# hoge"
|
10
|
+
end
|
11
|
+
|
12
|
+
it "h6" do
|
13
|
+
content = Wpconv::Filter::Markdown.apply("<h6>hoge</h6>")
|
14
|
+
expect(content).to eq "###### hoge"
|
15
|
+
end
|
16
|
+
|
17
|
+
it "h3(multiline)" do
|
18
|
+
content = Wpconv::Filter::Markdown.apply("<h3>ho\nge</h3>")
|
19
|
+
expect(content).to eq "### hoge"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
describe "list" do
|
24
|
+
it "ul" do
|
25
|
+
content = Wpconv::Filter::Markdown.apply("<ul>")
|
26
|
+
expect(content).to eq ""
|
27
|
+
end
|
28
|
+
|
29
|
+
it "/ul before LF" do
|
30
|
+
content = Wpconv::Filter::Markdown.apply("</ul>\n")
|
31
|
+
expect(content).to eq ""
|
32
|
+
end
|
33
|
+
|
34
|
+
it "ul > li*" do
|
35
|
+
content = Wpconv::Filter::Markdown.apply("<ul><li>hoge</li>\n<li>fuga</li></ul>")
|
36
|
+
expect(content).to eq "* hoge\n* fuga"
|
37
|
+
end
|
38
|
+
|
39
|
+
it "ul > li(multiline)" do
|
40
|
+
content = Wpconv::Filter::Markdown.apply("<ul><li>ho\nge</li>")
|
41
|
+
expect(content).to eq "* ho\nge"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
describe "hr" do
|
46
|
+
it "hr" do
|
47
|
+
content = Wpconv::Filter::Markdown.apply("<hr>")
|
48
|
+
expect(content).to eq "\n---\n"
|
49
|
+
end
|
50
|
+
|
51
|
+
it "hr/" do
|
52
|
+
content = Wpconv::Filter::Markdown.apply("<hr/>")
|
53
|
+
expect(content).to eq "\n---\n"
|
54
|
+
end
|
55
|
+
|
56
|
+
it "hr /" do
|
57
|
+
content = Wpconv::Filter::Markdown.apply("<hr />")
|
58
|
+
expect(content).to eq "\n---\n"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
describe "pre and code" do
|
63
|
+
it "pre and code" do
|
64
|
+
content = Wpconv::Filter::Markdown.apply("<pre><code>hoge\nfuga</code></pre>")
|
65
|
+
expect(content).to eq " hoge\n fuga"
|
66
|
+
end
|
67
|
+
|
68
|
+
it "pre and code with class" do
|
69
|
+
content = Wpconv::Filter::Markdown.apply("<pre><code class='text'>hoge\nfuga</code></pre>")
|
70
|
+
expect(content).to eq " hoge\n fuga"
|
71
|
+
end
|
72
|
+
|
73
|
+
it "pre and code < > &" do
|
74
|
+
content = Wpconv::Filter::Markdown.apply("<pre><code><hoge> & <fuga></code></pre>")
|
75
|
+
expect(content).to eq " <hoge> & <fuga>"
|
76
|
+
end
|
77
|
+
|
78
|
+
it "only pre" do
|
79
|
+
content = Wpconv::Filter::Markdown.apply("<pre class='text'>hoge\nfuga</pre>")
|
80
|
+
expect(content).to eq "<pre class='text'>hoge\nfuga</pre>"
|
81
|
+
end
|
82
|
+
|
83
|
+
it "code" do
|
84
|
+
content = Wpconv::Filter::Markdown.apply("a <code class='text'>hoge\nfuga</code> z")
|
85
|
+
expect(content).to eq "a `` hoge\nfuga `` z"
|
86
|
+
end
|
87
|
+
|
88
|
+
it "decode < > &" do
|
89
|
+
content = Wpconv::Filter::Markdown.apply("<code class='text'><hoge> & <fuga></code>")
|
90
|
+
expect(content).to eq "`` <hoge> & <fuga> ``"
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
describe "blockquote" do
|
95
|
+
it "blockquote" do
|
96
|
+
content = Wpconv::Filter::Markdown.apply("<blockquote>hoge\nfuga</blockquote>")
|
97
|
+
expect(content).to eq "> hoge\n> fuga"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
describe "em/strong/b" do
|
102
|
+
it "em" do
|
103
|
+
content = Wpconv::Filter::Markdown.apply("<em>hoge</em>")
|
104
|
+
expect(content).to eq "*hoge*"
|
105
|
+
end
|
106
|
+
|
107
|
+
it "em(multiline)" do
|
108
|
+
content = Wpconv::Filter::Markdown.apply("<em>hoge\nfuga</em>")
|
109
|
+
expect(content).to eq "*hoge\nfuga*"
|
110
|
+
end
|
111
|
+
|
112
|
+
it "strong" do
|
113
|
+
content = Wpconv::Filter::Markdown.apply("<strong>hoge</strong>")
|
114
|
+
expect(content).to eq "**hoge**"
|
115
|
+
end
|
116
|
+
|
117
|
+
it "b" do
|
118
|
+
content = Wpconv::Filter::Markdown.apply("<b>hoge</b>")
|
119
|
+
expect(content).to eq "**hoge**"
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
describe "link" do
|
124
|
+
it "a" do
|
125
|
+
content = Wpconv::Filter::Markdown.apply("<a href='http://www.example.com/hoge/' target='_blank'>hoge</a>")
|
126
|
+
expect(content).to eq "[hoge](http://www\\.example\\.com/hoge/)"
|
127
|
+
end
|
128
|
+
|
129
|
+
it "LF in anchor text" do
|
130
|
+
content = Wpconv::Filter::Markdown.apply("<a href='http://www.example.com/hoge/' target='_blank'>ho\nge</a>")
|
131
|
+
expect(content).to eq "[ho\nge](http://www\\.example\\.com/hoge/)"
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
describe "escaping literal" do
|
137
|
+
# %w(\\ ` * _ { } [ ] \( \) # + - . !)
|
138
|
+
describe "for escaped character" do
|
139
|
+
it '*' do
|
140
|
+
content = Wpconv::Filter::Markdown.apply("h*ge")
|
141
|
+
expect(content).to eq 'h\*ge'
|
142
|
+
end
|
143
|
+
|
144
|
+
it '_' do
|
145
|
+
content = Wpconv::Filter::Markdown.apply("_hoge_")
|
146
|
+
expect(content).to eq '\_hoge\_'
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
data/spec/wpconv_spec.rb
ADDED
data/wpconv.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'wpconv/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "wpconv"
|
8
|
+
spec.version = Wpconv::VERSION
|
9
|
+
spec.authors = ["akahige"]
|
10
|
+
spec.email = ["akahigeg@gmail.com"]
|
11
|
+
spec.summary = %q{Converting Wordpress export XML to other format.}
|
12
|
+
spec.description = %q{Converting Wordpress export XML to other format.}
|
13
|
+
spec.homepage = "https://github.com/akahigeg/wpconv"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
spec.add_development_dependency "rspec"
|
24
|
+
spec.add_dependency "thor"
|
25
|
+
spec.add_dependency "nokogiri"
|
26
|
+
spec.add_dependency "activesupport"
|
27
|
+
end
|
metadata
ADDED
@@ -0,0 +1,154 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wpconv
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- akahige
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-08-24 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: thor
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: nokogiri
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: activesupport
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: Converting Wordpress export XML to other format.
|
98
|
+
email:
|
99
|
+
- akahigeg@gmail.com
|
100
|
+
executables:
|
101
|
+
- wpconv
|
102
|
+
extensions: []
|
103
|
+
extra_rdoc_files: []
|
104
|
+
files:
|
105
|
+
- .gitignore
|
106
|
+
- Gemfile
|
107
|
+
- LICENSE.txt
|
108
|
+
- README.md
|
109
|
+
- Rakefile
|
110
|
+
- bin/wpconv
|
111
|
+
- lib/wpconv.rb
|
112
|
+
- lib/wpconv/cli.rb
|
113
|
+
- lib/wpconv/converter.rb
|
114
|
+
- lib/wpconv/filter/markdown.rb
|
115
|
+
- lib/wpconv/filter/none.rb
|
116
|
+
- lib/wpconv/version.rb
|
117
|
+
- lib/wpconv/wp_xml/channel.rb
|
118
|
+
- lib/wpconv/wp_xml/item.rb
|
119
|
+
- spec/spec_helper.rb
|
120
|
+
- spec/wpconv/converter_spec.rb
|
121
|
+
- spec/wpconv/filter/markdown_spec.rb
|
122
|
+
- spec/wpconv_spec.rb
|
123
|
+
- template/markdown.erb
|
124
|
+
- wpconv.gemspec
|
125
|
+
homepage: https://github.com/akahigeg/wpconv
|
126
|
+
licenses:
|
127
|
+
- MIT
|
128
|
+
metadata: {}
|
129
|
+
post_install_message:
|
130
|
+
rdoc_options: []
|
131
|
+
require_paths:
|
132
|
+
- lib
|
133
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
134
|
+
requirements:
|
135
|
+
- - '>='
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
version: '0'
|
138
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
139
|
+
requirements:
|
140
|
+
- - '>='
|
141
|
+
- !ruby/object:Gem::Version
|
142
|
+
version: '0'
|
143
|
+
requirements: []
|
144
|
+
rubyforge_project:
|
145
|
+
rubygems_version: 2.0.3
|
146
|
+
signing_key:
|
147
|
+
specification_version: 4
|
148
|
+
summary: Converting Wordpress export XML to other format.
|
149
|
+
test_files:
|
150
|
+
- spec/spec_helper.rb
|
151
|
+
- spec/wpconv/converter_spec.rb
|
152
|
+
- spec/wpconv/filter/markdown_spec.rb
|
153
|
+
- spec/wpconv_spec.rb
|
154
|
+
has_rdoc:
|