bunto-import 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 05b2589a386dfe73ec5e8fa3f0988e95d2b982dc
4
+ data.tar.gz: fdfee7813f47b1e34da4b3b178e1587d4133bc55
5
+ SHA512:
6
+ metadata.gz: 2e14ddd53cfb6e29b460c20b82e3a7ddc9f033202912793c540ae3fc79240b65f459cc5f08b65c6007b7b57b219ffeea71f2cea3d34e05c1a8ea570ec7befcfb
7
+ data.tar.gz: 288de4e071628f790bd049d04a21318673fcd810a159a13a5de739d3a022909b5e4d4229d7b866e00fae16a6fd057dbbb3b958205ae27502ecb26ec7f265e918
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016-present Tom Preston-Werner
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,32 @@
1
+ # bunto-import
2
+
3
+ [![Build Status](https://travis-ci.org/bunto/bunto-import.svg?branch=master)](https://travis-ci.org/bunto/bunto-import)
4
+
5
+ The new __Bunto__ command for importing from various blogs to Bunto format.
6
+
7
+ **Note: _migrators_ are now called _importers_ and are only available if one installs the `bunto-import` _gem_.**
8
+
9
+ ## How `bunto-import` works:
10
+
11
+ ### Bunto v2.x and higher
12
+
13
+ 1. Install the _rubygem_ with `gem install bunto-import`.
14
+ 2. Run `bunto import IMPORTER [options]`
15
+
16
+ ### Bunto v1.x
17
+
18
+ Launch IRB:
19
+
20
+ ```ruby
21
+ # 1. Require bunto-import
22
+ irb> require 'bunto-import'
23
+ # 2. Choose the importer you'd like to use.
24
+ irb> importer_class = "Behance" # an example, there are many others!
25
+ # 3. Run it!
26
+ irb> BuntoImport::Importers.const_get(importer_class).run(options_hash)
27
+ ```
28
+
29
+ ## Documentation
30
+
31
+ bunto-import has its own documentation site, found at https://bunto.github.io/.
32
+ Dedicated [documentation for each migrator](https://bunto.github.io/import/docs/home/) is available there.
@@ -0,0 +1,49 @@
1
+ $:.unshift File.dirname(__FILE__) # For use/testing when no gem is installed
2
+ require 'rubygems'
3
+ require 'bunto'
4
+ require 'bunto/commands/import'
5
+ require 'colorator'
6
+
7
+ require 'bunto-import/importer'
8
+ require 'bunto-import/importers'
9
+ require 'bunto-import/util'
10
+
11
+ module BuntoImport
12
+ # Public: Add the subcommands for each importer
13
+ #
14
+ # cmd - the instance of Mercenary::Command from the
15
+ #
16
+ # Returns a list of valid subcommands
17
+ def self.add_importer_commands(cmd)
18
+ commands = []
19
+ BuntoImport::Importer.subclasses.each do |importer|
20
+ name = importer.to_s.split("::").last.downcase
21
+ commands << name
22
+ cmd.command(name.to_sym) do |c|
23
+ c.syntax "#{name} [options]"
24
+ importer.specify_options(c)
25
+ c.action do |_, options|
26
+ importer.run(options)
27
+ end
28
+ end
29
+ end
30
+ commands
31
+ end
32
+
33
+ def self.require_with_fallback(gems)
34
+ Array[gems].flatten.each do |gem|
35
+ begin
36
+ require gem
37
+ rescue LoadError
38
+ Bunto.logger.error "Whoops! Looks like you need to install '#{gem}' before you can use this importer."
39
+ Bunto.logger.error ""
40
+ Bunto.logger.error "If you're using bundler:"
41
+ Bunto.logger.error " 1. Add 'gem \"#{gem}\"' to your Gemfile"
42
+ Bunto.logger.error " 2. Run 'bundle install'"
43
+ Bunto.logger.error ""
44
+ Bunto.logger.error "If you're not using bundler:"
45
+ Bunto.logger.abort_with " 1. Run 'gem install #{gem}'."
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,26 @@
1
+ module BuntoImport
2
+ class Importer
3
+ def self.inherited(base)
4
+ subclasses << base
5
+ end
6
+
7
+ def self.subclasses
8
+ @subclasses ||= []
9
+ end
10
+
11
+ def self.stringify_keys(hash)
12
+ the_hash = hash.clone
13
+ the_hash.keys.each do |key|
14
+ the_hash[(key.to_s rescue key) || key] = the_hash.delete(key)
15
+ end
16
+ the_hash
17
+ end
18
+
19
+ def self.run(options = {})
20
+ opts = stringify_keys(options)
21
+ self.require_deps
22
+ self.validate(opts) if self.respond_to?(:validate)
23
+ self.process(opts)
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,10 @@
1
+ module BuntoImport
2
+ module Importers
3
+ Dir.chdir(File.expand_path(File.join("importers"), File.dirname(__FILE__))) do
4
+ Dir.entries(".").each do |f|
5
+ next if f[0..0].eql?(".")
6
+ require "bunto-import/importers/#{f}"
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,80 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Behance < Importer
4
+ def self.require_deps
5
+ BuntoImport.require_with_fallback(%w[
6
+ fileutils
7
+ safe_yaml
8
+ date
9
+ time
10
+ behance
11
+ ])
12
+ end
13
+
14
+ def self.specify_options(c)
15
+ c.option 'user', '--user NAME', 'The username of the account'
16
+ c.option 'api_token', '--api_token TOKEN', 'The API access token for the account'
17
+ end
18
+
19
+ def self.validate(options)
20
+ %w[user api_token].each do |option|
21
+ if options[option].nil?
22
+ abort "Missing mandatory option --#{option}."
23
+ end
24
+ end
25
+ end
26
+
27
+ # Process the import.
28
+ #
29
+ # user - the behance user to retrieve projects (ID or username)
30
+ # api_token - your developer API Token
31
+ #
32
+ # Returns nothing.
33
+ def self.process(options)
34
+ user = options.fetch('user')
35
+ token = options.fetch('api_token')
36
+
37
+ client = fetch_behance(token)
38
+
39
+ user_projects = client.user_projects(user)
40
+
41
+ puts "#{user_projects.length} project(s) found. Importing now..."
42
+
43
+ user_projects.each do |project|
44
+
45
+ details = client.project(project['id'])
46
+ title = project['name'].to_s
47
+ formatted_date = Time.at(project['published_on'].to_i).to_date.to_s
48
+
49
+ post_name = title.split(%r{ |!|/|:|&|-|$|,}).map do |character|
50
+ character.downcase unless character.empty?
51
+ end.compact.join('-')
52
+
53
+ name = "#{formatted_date}-#{post_name}"
54
+
55
+ header = {
56
+ "layout" => "post",
57
+ "title" => title,
58
+ "details" => details
59
+ }
60
+
61
+ FileUtils.mkdir_p("_posts")
62
+
63
+ File.open("_posts/#{name}.md", "w") do |f|
64
+ f.puts header.to_yaml
65
+ f.puts "---\n\n"
66
+ f.puts details['description'].to_s
67
+ end
68
+ end
69
+
70
+ puts "Finished importing."
71
+ end
72
+
73
+ private
74
+
75
+ def self.fetch_behance(token)
76
+ ::Behance::Client.new(access_token: token)
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,264 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Blogger < Importer
4
+ def self.specify_options(c)
5
+ c.option 'source', '--source NAME', 'The XML file (blog-MM-DD-YYYY.xml) path to import'
6
+ c.option 'no-blogger-info', '--no-blogger-info', 'not to leave blogger-URL info (id and old URL) in the front matter (default: false)'
7
+ c.option 'replace-internal-link', '--replace-internal-link', 'replace internal links using the post_url liquid tag. (default: false)'
8
+ end
9
+
10
+ def self.validate(options)
11
+ if options['source'].nil?
12
+ raise 'Missing mandatory option: --source'
13
+ elsif not File.exist?(options['source'])
14
+ raise Errno::ENOENT, "File not found: #{options['source']}"
15
+ end
16
+ end
17
+
18
+ def self.require_deps
19
+ BuntoImport.require_with_fallback(%w[
20
+ rexml/document
21
+ rexml/streamlistener
22
+ rexml/parsers/streamparser
23
+ uri
24
+ time
25
+ fileutils
26
+ safe_yaml
27
+ open-uri
28
+ ])
29
+ end
30
+
31
+ # Process the import.
32
+ #
33
+ # source:: a local file String (or IO object for internal use purpose)..
34
+ # no-blogger-info:: a boolean if not leave blogger info (id and original URL).
35
+ # replace-internal-link:: a boolean if replace internal link
36
+ #
37
+ # Returns nothing.
38
+ def self.process(options)
39
+ source = options.fetch('source')
40
+
41
+ listener = BloggerAtomStreamListener.new
42
+
43
+ listener.leave_blogger_info = ! options.fetch('no-blogger-info', false),
44
+
45
+ File.open(source, 'r') do |f|
46
+ f.flock(File::LOCK_SH)
47
+ REXML::Parsers::StreamParser.new(f, listener).parse()
48
+ end
49
+
50
+ options['original-url-base'] = listener.original_url_base
51
+
52
+ postprocess(options)
53
+ end
54
+
55
+ # Post-process after import.
56
+ #
57
+ # replace-internal-link:: a boolean if replace internal link
58
+ #
59
+ # Returns nothing.
60
+ def self.postprocess(options)
61
+ # Replace internal link URL
62
+ if options.fetch('replace-internal-link', false)
63
+ original_url_base = options.fetch('original-url-base', nil)
64
+ if original_url_base
65
+ orig_url_pattern = Regexp.new(" href=([\"\'])(?:#{Regexp.escape(original_url_base)})?/([0-9]{4})/([0-9]{2})/([^\"\']+\.html)\\1")
66
+
67
+ Dir.glob('_posts/*.*') do |filename|
68
+ body = nil
69
+ File.open(filename, 'r') do |f|
70
+ f.flock(File::LOCK_SH)
71
+ body = f.read
72
+ end
73
+
74
+ body.gsub!(orig_url_pattern) do
75
+ # for post_url
76
+ quote = $1
77
+ post_file = Dir.glob("_posts/#{$2}-#{$3}-*-#{$4.to_s.tr('/', '-')}").first
78
+ raise "Could not found: _posts/#{$2}-#{$3}-*-#{$4.to_s.tr('/', '-')}" if post_file.nil?
79
+ " href=#{quote}{{ site.baseurl }}{% post_url #{File.basename(post_file, '.html')} %}#{quote}"
80
+ end
81
+
82
+ File.open(filename, 'w') do |f|
83
+ f.flock(File::LOCK_EX)
84
+ f << body
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
90
+
91
+ class BloggerAtomStreamListener
92
+ def initialize
93
+ # use `extend` instead of `include` to use `require_deps` instead of `require`.
94
+ extend REXML::StreamListener
95
+ extend BloggerAtomStreamListenerMethods
96
+
97
+ @leave_blogger_info = true
98
+ end
99
+ end
100
+
101
+ module BloggerAtomStreamListenerMethods
102
+ attr_accessor :leave_blogger_info
103
+ attr_reader :original_url_base
104
+
105
+ def tag_start(tag, attrs)
106
+ @tag_bread = [] unless @tag_bread
107
+ @tag_bread.push(tag)
108
+
109
+ case tag
110
+ when 'entry'
111
+ raise 'nest entry element' if @in_entry_elem
112
+ @in_entry_elem = {:meta => {}, :body => nil}
113
+ when 'title'
114
+ if @in_entry_elem
115
+ raise 'only <title type="text"></title> is supported' if attrs['type'] != 'text'
116
+ end
117
+ when 'category'
118
+ if @in_entry_elem
119
+ if attrs['scheme'] == 'http://www.blogger.com/atom/ns#'
120
+ @in_entry_elem[:meta][:category] = [] unless @in_entry_elem[:meta][:category]
121
+ @in_entry_elem[:meta][:category] << attrs['term']
122
+ elsif attrs['scheme'] == 'http://schemas.google.com/g/2005#kind'
123
+ kind = attrs['term']
124
+ kind.sub!(Regexp.new("^http://schemas\\.google\\.com/blogger/2008/kind\\#"), '')
125
+ @in_entry_elem[:meta][:kind] = kind
126
+ end
127
+ end
128
+ when 'content'
129
+ if @in_entry_elem
130
+ @in_entry_elem[:meta][:content_type] = attrs['type']
131
+ end
132
+ when 'link'
133
+ if @in_entry_elem
134
+ if attrs['rel'] == 'alternate' && attrs['type'] == 'text/html'
135
+ @in_entry_elem[:meta][:original_url] = attrs['href']
136
+ elsif attrs['rel'] == 'replies' && attrs['type'] == 'text/html'
137
+ unless @in_entry_elem[:meta][:original_url]
138
+ @in_entry_elem[:meta][:original_url] = attrs['href'].sub(/\#comment-form$/, '')
139
+ end
140
+ end
141
+ end
142
+ when 'media:thumbnail'
143
+ if @in_entry_elem
144
+ @in_entry_elem[:meta][:thumbnail] = attrs['url']
145
+ end
146
+ end
147
+ end
148
+
149
+ def text(text)
150
+ if @in_entry_elem
151
+ case @tag_bread.last
152
+ when 'id'
153
+ @in_entry_elem[:meta][:id] = text
154
+ when 'published'
155
+ @in_entry_elem[:meta][:published] = text
156
+ when 'updated'
157
+ @in_entry_elem[:meta][:updated] = text
158
+ when 'title'
159
+ @in_entry_elem[:meta][:title] = text
160
+ when 'content'
161
+ @in_entry_elem[:body] = text
162
+ when 'name'
163
+ if @tag_bread[-2..-1] == %w[author name]
164
+ @in_entry_elem[:meta][:author] = text
165
+ end
166
+ when 'app:draft'
167
+ if @tag_bread[-2..-1] == %w[app:control app:draft]
168
+ @in_entry_elem[:meta][:draft] = true if text == 'yes'
169
+ end
170
+ end
171
+ end
172
+ end
173
+
174
+ def tag_end(tag)
175
+ case tag
176
+ when 'entry'
177
+ raise 'nest entry element' unless @in_entry_elem
178
+
179
+ if @in_entry_elem[:meta][:kind] == 'post'
180
+ post_data = get_post_data_from_in_entry_elem_info
181
+
182
+ if post_data
183
+ target_dir = '_posts'
184
+ target_dir = '_drafts' if @in_entry_elem[:meta][:draft]
185
+
186
+ FileUtils.mkdir_p(target_dir)
187
+
188
+ file_name = URI::decode("#{post_data[:filename]}.html")
189
+ File.open(File.join(target_dir, file_name), 'w') do |f|
190
+ f.flock(File::LOCK_EX)
191
+
192
+ f << post_data[:header].to_yaml
193
+ f << "---\n\n"
194
+ f << post_data[:body]
195
+ end
196
+ end
197
+ end
198
+
199
+ @in_entry_elem = nil
200
+ end
201
+
202
+ @tag_bread.pop
203
+ end
204
+
205
+ def get_post_data_from_in_entry_elem_info
206
+ if (@in_entry_elem.nil? || ! @in_entry_elem.has_key?(:meta) || ! @in_entry_elem[:meta].has_key?(:kind))
207
+ nil
208
+ elsif @in_entry_elem[:meta][:kind] == 'post'
209
+ timestamp = Time.parse(@in_entry_elem[:meta][:published]).strftime('%Y-%m-%d')
210
+ if @in_entry_elem[:meta][:original_url]
211
+ original_uri = URI.parse(@in_entry_elem[:meta][:original_url])
212
+ original_path = original_uri.path.to_s
213
+ filename = "%s-%s" %
214
+ [timestamp,
215
+ File.basename(original_path, File.extname(original_path))]
216
+
217
+ @original_url_base = "#{original_uri.scheme}://#{original_uri.host}"
218
+ elsif @in_entry_elem[:meta][:draft]
219
+ # Drafts don't have published urls
220
+ name = @in_entry_elem[:meta][:title]
221
+ if name.nil?
222
+ filename = timestamp
223
+ else
224
+ filename = "%s-%s" %
225
+ [timestamp,
226
+ CGI.escape(name.downcase).tr('+','-')]
227
+ end
228
+ else
229
+ raise 'Original URL is missing'
230
+ end
231
+
232
+ header = {
233
+ 'layout' => 'post',
234
+ 'title' => @in_entry_elem[:meta][:title],
235
+ 'date' => @in_entry_elem[:meta][:published],
236
+ 'author' => @in_entry_elem[:meta][:author],
237
+ 'tags' => @in_entry_elem[:meta][:category],
238
+ }
239
+ header['modified_time'] = @in_entry_elem[:meta][:updated] if @in_entry_elem[:meta][:updated] && @in_entry_elem[:meta][:updated] != @in_entry_elem[:meta][:published]
240
+ header['thumbnail'] = @in_entry_elem[:meta][:thumbnail] if @in_entry_elem[:meta][:thumbnail]
241
+ header['blogger_id'] = @in_entry_elem[:meta][:id] if @leave_blogger_info
242
+ header['blogger_orig_url'] = @in_entry_elem[:meta][:original_url] if @leave_blogger_info && @in_entry_elem[:meta][:original_url]
243
+
244
+ body = @in_entry_elem[:body]
245
+
246
+ # body escaping associated with liquid
247
+ if body =~ /{{/
248
+ body.gsub!(/{{/, '{{ "{{" }}')
249
+ end
250
+ if body =~ /{%/
251
+ body.gsub!(/{%/, '{{ "{%" }}')
252
+ end
253
+
254
+ { :filename => filename, :header => header, :body => body }
255
+ else
256
+ nil
257
+ end
258
+ end
259
+
260
+ end
261
+
262
+ end
263
+ end
264
+ end