bunto-import 2.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +21 -21
  3. data/README.markdown +33 -33
  4. data/lib/bunto-import.rb +49 -49
  5. data/lib/bunto-import/importer.rb +26 -26
  6. data/lib/bunto-import/importers.rb +10 -10
  7. data/lib/bunto-import/importers/behance.rb +80 -80
  8. data/lib/bunto-import/importers/blogger.rb +330 -264
  9. data/lib/bunto-import/importers/csv.rb +96 -96
  10. data/lib/bunto-import/importers/drupal6.rb +53 -139
  11. data/lib/bunto-import/importers/drupal7.rb +54 -111
  12. data/lib/bunto-import/importers/drupal_common.rb +157 -0
  13. data/lib/bunto-import/importers/easyblog.rb +96 -96
  14. data/lib/bunto-import/importers/enki.rb +74 -74
  15. data/lib/bunto-import/importers/ghost.rb +68 -68
  16. data/lib/bunto-import/importers/google_reader.rb +64 -64
  17. data/lib/bunto-import/importers/joomla.rb +92 -90
  18. data/lib/bunto-import/importers/joomla3.rb +91 -91
  19. data/lib/bunto-import/importers/jrnl.rb +125 -125
  20. data/lib/bunto-import/importers/marley.rb +72 -72
  21. data/lib/bunto-import/importers/mephisto.rb +99 -99
  22. data/lib/bunto-import/importers/mt.rb +257 -257
  23. data/lib/bunto-import/importers/posterous.rb +130 -130
  24. data/lib/bunto-import/importers/rss.rb +62 -62
  25. data/lib/bunto-import/importers/s9y.rb +60 -60
  26. data/lib/bunto-import/importers/s9y_database.rb +363 -0
  27. data/lib/bunto-import/importers/textpattern.rb +70 -70
  28. data/lib/bunto-import/importers/tumblr.rb +300 -289
  29. data/lib/bunto-import/importers/typo.rb +88 -88
  30. data/lib/bunto-import/importers/wordpress.rb +372 -372
  31. data/lib/bunto-import/importers/wordpressdotcom.rb +207 -207
  32. data/lib/bunto-import/util.rb +76 -76
  33. data/lib/bunto-import/version.rb +3 -3
  34. data/lib/bunto/commands/import.rb +79 -79
  35. metadata +84 -54
@@ -1,264 +1,330 @@
1
- module BuntoImport
2
- module Importers
3
- class Blogger < Importer
4
- def self.specify_options(c)
5
- c.option 'source', '--source NAME', 'The XML file (blog-MM-DD-YYYY.xml) path to import'
6
- c.option 'no-blogger-info', '--no-blogger-info', 'not to leave blogger-URL info (id and old URL) in the front matter (default: false)'
7
- c.option 'replace-internal-link', '--replace-internal-link', 'replace internal links using the post_url liquid tag. (default: false)'
8
- end
9
-
10
- def self.validate(options)
11
- if options['source'].nil?
12
- raise 'Missing mandatory option: --source'
13
- elsif not File.exist?(options['source'])
14
- raise Errno::ENOENT, "File not found: #{options['source']}"
15
- end
16
- end
17
-
18
- def self.require_deps
19
- BuntoImport.require_with_fallback(%w[
20
- rexml/document
21
- rexml/streamlistener
22
- rexml/parsers/streamparser
23
- uri
24
- time
25
- fileutils
26
- safe_yaml
27
- open-uri
28
- ])
29
- end
30
-
31
- # Process the import.
32
- #
33
- # source:: a local file String (or IO object for internal use purpose)..
34
- # no-blogger-info:: a boolean if not leave blogger info (id and original URL).
35
- # replace-internal-link:: a boolean if replace internal link
36
- #
37
- # Returns nothing.
38
- def self.process(options)
39
- source = options.fetch('source')
40
-
41
- listener = BloggerAtomStreamListener.new
42
-
43
- listener.leave_blogger_info = ! options.fetch('no-blogger-info', false),
44
-
45
- File.open(source, 'r') do |f|
46
- f.flock(File::LOCK_SH)
47
- REXML::Parsers::StreamParser.new(f, listener).parse()
48
- end
49
-
50
- options['original-url-base'] = listener.original_url_base
51
-
52
- postprocess(options)
53
- end
54
-
55
- # Post-process after import.
56
- #
57
- # replace-internal-link:: a boolean if replace internal link
58
- #
59
- # Returns nothing.
60
- def self.postprocess(options)
61
- # Replace internal link URL
62
- if options.fetch('replace-internal-link', false)
63
- original_url_base = options.fetch('original-url-base', nil)
64
- if original_url_base
65
- orig_url_pattern = Regexp.new(" href=([\"\'])(?:#{Regexp.escape(original_url_base)})?/([0-9]{4})/([0-9]{2})/([^\"\']+\.html)\\1")
66
-
67
- Dir.glob('_posts/*.*') do |filename|
68
- body = nil
69
- File.open(filename, 'r') do |f|
70
- f.flock(File::LOCK_SH)
71
- body = f.read
72
- end
73
-
74
- body.gsub!(orig_url_pattern) do
75
- # for post_url
76
- quote = $1
77
- post_file = Dir.glob("_posts/#{$2}-#{$3}-*-#{$4.to_s.tr('/', '-')}").first
78
- raise "Could not found: _posts/#{$2}-#{$3}-*-#{$4.to_s.tr('/', '-')}" if post_file.nil?
79
- " href=#{quote}{{ site.baseurl }}{% post_url #{File.basename(post_file, '.html')} %}#{quote}"
80
- end
81
-
82
- File.open(filename, 'w') do |f|
83
- f.flock(File::LOCK_EX)
84
- f << body
85
- end
86
- end
87
- end
88
- end
89
- end
90
-
91
- class BloggerAtomStreamListener
92
- def initialize
93
- # use `extend` instead of `include` to use `require_deps` instead of `require`.
94
- extend REXML::StreamListener
95
- extend BloggerAtomStreamListenerMethods
96
-
97
- @leave_blogger_info = true
98
- end
99
- end
100
-
101
- module BloggerAtomStreamListenerMethods
102
- attr_accessor :leave_blogger_info
103
- attr_reader :original_url_base
104
-
105
- def tag_start(tag, attrs)
106
- @tag_bread = [] unless @tag_bread
107
- @tag_bread.push(tag)
108
-
109
- case tag
110
- when 'entry'
111
- raise 'nest entry element' if @in_entry_elem
112
- @in_entry_elem = {:meta => {}, :body => nil}
113
- when 'title'
114
- if @in_entry_elem
115
- raise 'only <title type="text"></title> is supported' if attrs['type'] != 'text'
116
- end
117
- when 'category'
118
- if @in_entry_elem
119
- if attrs['scheme'] == 'http://www.blogger.com/atom/ns#'
120
- @in_entry_elem[:meta][:category] = [] unless @in_entry_elem[:meta][:category]
121
- @in_entry_elem[:meta][:category] << attrs['term']
122
- elsif attrs['scheme'] == 'http://schemas.google.com/g/2005#kind'
123
- kind = attrs['term']
124
- kind.sub!(Regexp.new("^http://schemas\\.google\\.com/blogger/2008/kind\\#"), '')
125
- @in_entry_elem[:meta][:kind] = kind
126
- end
127
- end
128
- when 'content'
129
- if @in_entry_elem
130
- @in_entry_elem[:meta][:content_type] = attrs['type']
131
- end
132
- when 'link'
133
- if @in_entry_elem
134
- if attrs['rel'] == 'alternate' && attrs['type'] == 'text/html'
135
- @in_entry_elem[:meta][:original_url] = attrs['href']
136
- elsif attrs['rel'] == 'replies' && attrs['type'] == 'text/html'
137
- unless @in_entry_elem[:meta][:original_url]
138
- @in_entry_elem[:meta][:original_url] = attrs['href'].sub(/\#comment-form$/, '')
139
- end
140
- end
141
- end
142
- when 'media:thumbnail'
143
- if @in_entry_elem
144
- @in_entry_elem[:meta][:thumbnail] = attrs['url']
145
- end
146
- end
147
- end
148
-
149
- def text(text)
150
- if @in_entry_elem
151
- case @tag_bread.last
152
- when 'id'
153
- @in_entry_elem[:meta][:id] = text
154
- when 'published'
155
- @in_entry_elem[:meta][:published] = text
156
- when 'updated'
157
- @in_entry_elem[:meta][:updated] = text
158
- when 'title'
159
- @in_entry_elem[:meta][:title] = text
160
- when 'content'
161
- @in_entry_elem[:body] = text
162
- when 'name'
163
- if @tag_bread[-2..-1] == %w[author name]
164
- @in_entry_elem[:meta][:author] = text
165
- end
166
- when 'app:draft'
167
- if @tag_bread[-2..-1] == %w[app:control app:draft]
168
- @in_entry_elem[:meta][:draft] = true if text == 'yes'
169
- end
170
- end
171
- end
172
- end
173
-
174
- def tag_end(tag)
175
- case tag
176
- when 'entry'
177
- raise 'nest entry element' unless @in_entry_elem
178
-
179
- if @in_entry_elem[:meta][:kind] == 'post'
180
- post_data = get_post_data_from_in_entry_elem_info
181
-
182
- if post_data
183
- target_dir = '_posts'
184
- target_dir = '_drafts' if @in_entry_elem[:meta][:draft]
185
-
186
- FileUtils.mkdir_p(target_dir)
187
-
188
- file_name = URI::decode("#{post_data[:filename]}.html")
189
- File.open(File.join(target_dir, file_name), 'w') do |f|
190
- f.flock(File::LOCK_EX)
191
-
192
- f << post_data[:header].to_yaml
193
- f << "---\n\n"
194
- f << post_data[:body]
195
- end
196
- end
197
- end
198
-
199
- @in_entry_elem = nil
200
- end
201
-
202
- @tag_bread.pop
203
- end
204
-
205
- def get_post_data_from_in_entry_elem_info
206
- if (@in_entry_elem.nil? || ! @in_entry_elem.has_key?(:meta) || ! @in_entry_elem[:meta].has_key?(:kind))
207
- nil
208
- elsif @in_entry_elem[:meta][:kind] == 'post'
209
- timestamp = Time.parse(@in_entry_elem[:meta][:published]).strftime('%Y-%m-%d')
210
- if @in_entry_elem[:meta][:original_url]
211
- original_uri = URI.parse(@in_entry_elem[:meta][:original_url])
212
- original_path = original_uri.path.to_s
213
- filename = "%s-%s" %
214
- [timestamp,
215
- File.basename(original_path, File.extname(original_path))]
216
-
217
- @original_url_base = "#{original_uri.scheme}://#{original_uri.host}"
218
- elsif @in_entry_elem[:meta][:draft]
219
- # Drafts don't have published urls
220
- name = @in_entry_elem[:meta][:title]
221
- if name.nil?
222
- filename = timestamp
223
- else
224
- filename = "%s-%s" %
225
- [timestamp,
226
- CGI.escape(name.downcase).tr('+','-')]
227
- end
228
- else
229
- raise 'Original URL is missing'
230
- end
231
-
232
- header = {
233
- 'layout' => 'post',
234
- 'title' => @in_entry_elem[:meta][:title],
235
- 'date' => @in_entry_elem[:meta][:published],
236
- 'author' => @in_entry_elem[:meta][:author],
237
- 'tags' => @in_entry_elem[:meta][:category],
238
- }
239
- header['modified_time'] = @in_entry_elem[:meta][:updated] if @in_entry_elem[:meta][:updated] && @in_entry_elem[:meta][:updated] != @in_entry_elem[:meta][:published]
240
- header['thumbnail'] = @in_entry_elem[:meta][:thumbnail] if @in_entry_elem[:meta][:thumbnail]
241
- header['blogger_id'] = @in_entry_elem[:meta][:id] if @leave_blogger_info
242
- header['blogger_orig_url'] = @in_entry_elem[:meta][:original_url] if @leave_blogger_info && @in_entry_elem[:meta][:original_url]
243
-
244
- body = @in_entry_elem[:body]
245
-
246
- # body escaping associated with liquid
247
- if body =~ /{{/
248
- body.gsub!(/{{/, '{{ "{{" }}')
249
- end
250
- if body =~ /{%/
251
- body.gsub!(/{%/, '{{ "{%" }}')
252
- end
253
-
254
- { :filename => filename, :header => header, :body => body }
255
- else
256
- nil
257
- end
258
- end
259
-
260
- end
261
-
262
- end
263
- end
264
- end
1
+ module BuntoImport
2
+ module Importers
3
+ class Blogger < Importer
4
+ def self.specify_options(c)
5
+ c.option 'source', '--source NAME', 'The XML file (blog-MM-DD-YYYY.xml) path to import'
6
+ c.option 'no-blogger-info', '--no-blogger-info', 'not to leave blogger-URL info (id and old URL) in the front matter (default: false)'
7
+ c.option 'replace-internal-link', '--replace-internal-link', 'replace internal links using the post_url liquid tag. (default: false)'
8
+ c.option 'comments', '--comments', 'import comments to _comments collection'
9
+ end
10
+
11
+ def self.validate(options)
12
+ if options['source'].nil?
13
+ raise 'Missing mandatory option: --source'
14
+ elsif not File.exist?(options['source'])
15
+ raise Errno::ENOENT, "File not found: #{options['source']}"
16
+ end
17
+ end
18
+
19
+ def self.require_deps
20
+ BuntoImport.require_with_fallback(%w[
21
+ rexml/document
22
+ rexml/streamlistener
23
+ rexml/parsers/streamparser
24
+ uri
25
+ time
26
+ fileutils
27
+ safe_yaml
28
+ open-uri
29
+ ])
30
+ end
31
+
32
+ # Process the import.
33
+ #
34
+ # source:: a local file String (or IO object for internal use purpose)..
35
+ # no-blogger-info:: a boolean if not leave blogger info (id and original URL).
36
+ # replace-internal-link:: a boolean if replace internal link
37
+ #
38
+ # Returns nothing.
39
+ def self.process(options)
40
+ source = options.fetch('source')
41
+
42
+ listener = BloggerAtomStreamListener.new
43
+
44
+ listener.leave_blogger_info = ! options.fetch('no-blogger-info', false),
45
+ listener.comments = options.fetch('comments', false),
46
+
47
+ File.open(source, 'r') do |f|
48
+ f.flock(File::LOCK_SH)
49
+ REXML::Parsers::StreamParser.new(f, listener).parse()
50
+ end
51
+
52
+ options['original-url-base'] = listener.original_url_base
53
+
54
+ postprocess(options)
55
+ end
56
+
57
+ # Post-process after import.
58
+ #
59
+ # replace-internal-link:: a boolean if replace internal link
60
+ #
61
+ # Returns nothing.
62
+ def self.postprocess(options)
63
+ # Replace internal link URL
64
+ if options.fetch('replace-internal-link', false)
65
+ original_url_base = options.fetch('original-url-base', nil)
66
+ if original_url_base
67
+ orig_url_pattern = Regexp.new(" href=([\"\'])(?:#{Regexp.escape(original_url_base)})?/([0-9]{4})/([0-9]{2})/([^\"\']+\.html)\\1")
68
+
69
+ Dir.glob('_posts/*.*') do |filename|
70
+ body = nil
71
+ File.open(filename, 'r') do |f|
72
+ f.flock(File::LOCK_SH)
73
+ body = f.read
74
+ end
75
+
76
+ body.gsub!(orig_url_pattern) do
77
+ # for post_url
78
+ quote = $1
79
+ post_file = Dir.glob("_posts/#{$2}-#{$3}-*-#{$4.to_s.tr('/', '-')}").first
80
+ raise "Could not found: _posts/#{$2}-#{$3}-*-#{$4.to_s.tr('/', '-')}" if post_file.nil?
81
+ " href=#{quote}{{ site.baseurl }}{% post_url #{File.basename(post_file, '.html')} %}#{quote}"
82
+ end
83
+
84
+ File.open(filename, 'w') do |f|
85
+ f.flock(File::LOCK_EX)
86
+ f << body
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
92
+
93
+ class BloggerAtomStreamListener
94
+ def initialize
95
+ # use `extend` instead of `include` to use `require_deps` instead of `require`.
96
+ extend REXML::StreamListener
97
+ extend BloggerAtomStreamListenerMethods
98
+
99
+ @leave_blogger_info = true
100
+ @comments = false
101
+ end
102
+ end
103
+
104
+ module BloggerAtomStreamListenerMethods
105
+ attr_accessor :leave_blogger_info, :comments
106
+ attr_reader :original_url_base
107
+
108
+ def tag_start(tag, attrs)
109
+ @tag_bread = [] unless @tag_bread
110
+ @tag_bread.push(tag)
111
+
112
+ case tag
113
+ when 'entry'
114
+ raise 'nest entry element' if @in_entry_elem
115
+ @in_entry_elem = {:meta => {}, :body => nil}
116
+ when 'title'
117
+ if @in_entry_elem
118
+ raise 'only <title type="text"></title> is supported' if attrs['type'] != 'text'
119
+ end
120
+ when 'category'
121
+ if @in_entry_elem
122
+ if attrs['scheme'] == 'http://www.blogger.com/atom/ns#'
123
+ @in_entry_elem[:meta][:category] = [] unless @in_entry_elem[:meta][:category]
124
+ @in_entry_elem[:meta][:category] << attrs['term']
125
+ elsif attrs['scheme'] == 'http://schemas.google.com/g/2005#kind'
126
+ kind = attrs['term']
127
+ kind.sub!(Regexp.new("^http://schemas\\.google\\.com/blogger/2008/kind\\#"), '')
128
+ @in_entry_elem[:meta][:kind] = kind
129
+ end
130
+ end
131
+ when 'content'
132
+ if @in_entry_elem
133
+ @in_entry_elem[:meta][:content_type] = attrs['type']
134
+ end
135
+ when 'link'
136
+ if @in_entry_elem
137
+ if attrs['rel'] == 'alternate' && attrs['type'] == 'text/html'
138
+ @in_entry_elem[:meta][:original_url] = attrs['href']
139
+ elsif attrs['rel'] == 'replies' && attrs['type'] == 'text/html'
140
+ unless @in_entry_elem[:meta][:original_url]
141
+ @in_entry_elem[:meta][:original_url] = attrs['href'].sub(/\#comment-form$/, '')
142
+ end
143
+ end
144
+ end
145
+ when 'media:thumbnail'
146
+ if @in_entry_elem
147
+ @in_entry_elem[:meta][:thumbnail] = attrs['url']
148
+ end
149
+ when 'thr:in-reply-to'
150
+ if @in_entry_elem
151
+ @in_entry_elem[:meta][:post_id] = attrs['ref']
152
+ end
153
+ end
154
+ end
155
+
156
+ def text(text)
157
+ if @in_entry_elem
158
+ case @tag_bread.last
159
+ when 'id'
160
+ @in_entry_elem[:meta][:id] = text
161
+ when 'published'
162
+ @in_entry_elem[:meta][:published] = text
163
+ when 'updated'
164
+ @in_entry_elem[:meta][:updated] = text
165
+ when 'title'
166
+ @in_entry_elem[:meta][:title] = text
167
+ when 'content'
168
+ @in_entry_elem[:body] = text
169
+ when 'name'
170
+ if @tag_bread[-2..-1] == %w[author name]
171
+ @in_entry_elem[:meta][:author] = text
172
+ end
173
+ when 'app:draft'
174
+ if @tag_bread[-2..-1] == %w[app:control app:draft]
175
+ @in_entry_elem[:meta][:draft] = true if text == 'yes'
176
+ end
177
+ end
178
+ end
179
+ end
180
+
181
+ def tag_end(tag)
182
+ case tag
183
+ when 'entry'
184
+ raise 'nest entry element' unless @in_entry_elem
185
+
186
+ if @in_entry_elem[:meta][:kind] == 'post'
187
+ post_data = get_post_data_from_in_entry_elem_info
188
+
189
+ if post_data
190
+ target_dir = '_posts'
191
+ target_dir = '_drafts' if @in_entry_elem[:meta][:draft]
192
+
193
+ FileUtils.mkdir_p(target_dir)
194
+
195
+ file_name = URI::decode("#{post_data[:filename]}.html")
196
+ File.open(File.join(target_dir, file_name), 'w') do |f|
197
+ f.flock(File::LOCK_EX)
198
+
199
+ f << post_data[:header].to_yaml
200
+ f << "---\n\n"
201
+ f << post_data[:body]
202
+ end
203
+ end
204
+ elsif @in_entry_elem[:meta][:kind] == 'comment' and @comments
205
+ post_data = get_post_data_from_in_entry_elem_info
206
+
207
+ if post_data
208
+ target_dir = '_comments'
209
+
210
+ FileUtils.mkdir_p(target_dir)
211
+
212
+ file_name = URI::decode("#{post_data[:filename]}.html")
213
+ File.open(File.join(target_dir, file_name), 'w') do |f|
214
+ f.flock(File::LOCK_EX)
215
+
216
+ f << post_data[:header].to_yaml
217
+ f << "---\n\n"
218
+ f << post_data[:body]
219
+ end
220
+ end
221
+ end
222
+
223
+ @in_entry_elem = nil
224
+ end
225
+
226
+ @tag_bread.pop
227
+ end
228
+
229
+ def get_post_data_from_in_entry_elem_info
230
+ if (@in_entry_elem.nil? || ! @in_entry_elem.has_key?(:meta) || ! @in_entry_elem[:meta].has_key?(:kind))
231
+ nil
232
+ elsif @in_entry_elem[:meta][:kind] == 'post'
233
+ timestamp = Time.parse(@in_entry_elem[:meta][:published]).strftime('%Y-%m-%d')
234
+ if @in_entry_elem[:meta][:original_url]
235
+ original_uri = URI.parse(@in_entry_elem[:meta][:original_url])
236
+ original_path = original_uri.path.to_s
237
+ filename = "%s-%s" %
238
+ [timestamp,
239
+ File.basename(original_path, File.extname(original_path))]
240
+
241
+ @original_url_base = "#{original_uri.scheme}://#{original_uri.host}"
242
+ elsif @in_entry_elem[:meta][:draft]
243
+ # Drafts don't have published urls
244
+ name = @in_entry_elem[:meta][:title]
245
+ if name.nil?
246
+ filename = timestamp
247
+ else
248
+ filename = "%s-%s" %
249
+ [timestamp,
250
+ CGI.escape(name.downcase).tr('+','-')]
251
+ end
252
+ else
253
+ raise 'Original URL is missing'
254
+ end
255
+
256
+ header = {
257
+ 'layout' => 'post',
258
+ 'title' => @in_entry_elem[:meta][:title],
259
+ 'date' => @in_entry_elem[:meta][:published],
260
+ 'author' => @in_entry_elem[:meta][:author],
261
+ 'tags' => @in_entry_elem[:meta][:category],
262
+ }
263
+ header['modified_time'] = @in_entry_elem[:meta][:updated] if @in_entry_elem[:meta][:updated] && @in_entry_elem[:meta][:updated] != @in_entry_elem[:meta][:published]
264
+ header['thumbnail'] = @in_entry_elem[:meta][:thumbnail] if @in_entry_elem[:meta][:thumbnail]
265
+ header['blogger_id'] = @in_entry_elem[:meta][:id] if @leave_blogger_info
266
+ header['blogger_orig_url'] = @in_entry_elem[:meta][:original_url] if @leave_blogger_info && @in_entry_elem[:meta][:original_url]
267
+
268
+ body = @in_entry_elem[:body]
269
+
270
+ # body escaping associated with liquid
271
+ if body =~ /{{/
272
+ body.gsub!(/{{/, '{{ "{{" }}')
273
+ end
274
+ if body =~ /{%/
275
+ body.gsub!(/{%/, '{{ "{%" }}')
276
+ end
277
+
278
+ { :filename => filename, :header => header, :body => body }
279
+ elsif @in_entry_elem[:meta][:kind] == 'comment'
280
+ timestamp = Time.parse(@in_entry_elem[:meta][:published]).strftime('%Y-%m-%d')
281
+ if @in_entry_elem[:meta][:original_url]
282
+ if not @comment_seq
283
+ @comment_seq = 1
284
+ end
285
+
286
+ original_uri = URI.parse(@in_entry_elem[:meta][:original_url])
287
+ original_path = original_uri.path.to_s
288
+ filename = "%s-%s-%s" %
289
+ [timestamp,
290
+ File.basename(original_path, File.extname(original_path)),
291
+ @comment_seq]
292
+
293
+ @comment_seq = @comment_seq + 1
294
+
295
+ @original_url_base = "#{original_uri.scheme}://#{original_uri.host}"
296
+ else
297
+ raise 'Original URL is missing'
298
+ end
299
+
300
+ header = {
301
+ 'date' => @in_entry_elem[:meta][:published],
302
+ 'author' => @in_entry_elem[:meta][:author],
303
+ 'blogger_post_id' => @in_entry_elem[:meta][:post_id],
304
+ }
305
+ header['modified_time'] = @in_entry_elem[:meta][:updated] if @in_entry_elem[:meta][:updated] && @in_entry_elem[:meta][:updated] != @in_entry_elem[:meta][:published]
306
+ header['thumbnail'] = @in_entry_elem[:meta][:thumbnail] if @in_entry_elem[:meta][:thumbnail]
307
+ header['blogger_id'] = @in_entry_elem[:meta][:id] if @leave_blogger_info
308
+ header['blogger_orig_url'] = @in_entry_elem[:meta][:original_url] if @leave_blogger_info && @in_entry_elem[:meta][:original_url]
309
+
310
+ body = @in_entry_elem[:body]
311
+
312
+ # body escaping associated with liquid
313
+ if body =~ /{{/
314
+ body.gsub!(/{{/, '{{ "{{" }}')
315
+ end
316
+ if body =~ /{%/
317
+ body.gsub!(/{%/, '{{ "{%" }}')
318
+ end
319
+
320
+ { :filename => filename, :header => header, :body => body }
321
+ else
322
+ nil
323
+ end
324
+ end
325
+
326
+ end
327
+
328
+ end
329
+ end
330
+ end