bunto-import 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 05b2589a386dfe73ec5e8fa3f0988e95d2b982dc
4
+ data.tar.gz: fdfee7813f47b1e34da4b3b178e1587d4133bc55
5
+ SHA512:
6
+ metadata.gz: 2e14ddd53cfb6e29b460c20b82e3a7ddc9f033202912793c540ae3fc79240b65f459cc5f08b65c6007b7b57b219ffeea71f2cea3d34e05c1a8ea570ec7befcfb
7
+ data.tar.gz: 288de4e071628f790bd049d04a21318673fcd810a159a13a5de739d3a022909b5e4d4229d7b866e00fae16a6fd057dbbb3b958205ae27502ecb26ec7f265e918
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016-present Tom Preston-Werner
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,32 @@
1
+ # bunto-import
2
+
3
+ [![Build Status](https://travis-ci.org/bunto/bunto-import.svg?branch=master)](https://travis-ci.org/bunto/bunto-import)
4
+
5
+ The new __Bunto__ command for importing from various blogs to Bunto format.
6
+
7
+ **Note: _migrators_ are now called _importers_ and are only available if one installs the `bunto-import` _gem_.**
8
+
9
+ ## How `bunto-import` works:
10
+
11
+ ### Bunto v2.x and higher
12
+
13
+ 1. Install the _rubygem_ with `gem install bunto-import`.
14
+ 2. Run `bunto import IMPORTER [options]`
15
+
16
+ ### Bunto v1.x
17
+
18
+ Launch IRB:
19
+
20
+ ```ruby
21
+ # 1. Require bunto-import
22
+ irb> require 'bunto-import'
23
+ # 2. Choose the importer you'd like to use.
24
+ irb> importer_class = "Behance" # an example, there are many others!
25
+ # 3. Run it!
26
+ irb> BuntoImport::Importers.const_get(importer_class).run(options_hash)
27
+ ```
28
+
29
+ ## Documentation
30
+
31
+ bunto-import has its own documentation site, found at https://bunto.github.io/.
32
+ Dedicated [documentation for each migrator](https://bunto.github.io/import/docs/home/) is available there.
@@ -0,0 +1,49 @@
1
+ $:.unshift File.dirname(__FILE__) # For use/testing when no gem is installed
2
+ require 'rubygems'
3
+ require 'bunto'
4
+ require 'bunto/commands/import'
5
+ require 'colorator'
6
+
7
+ require 'bunto-import/importer'
8
+ require 'bunto-import/importers'
9
+ require 'bunto-import/util'
10
+
11
+ module BuntoImport
12
+ # Public: Add the subcommands for each importer
13
+ #
14
+ # cmd - the instance of Mercenary::Command from the
15
+ #
16
+ # Returns a list of valid subcommands
17
+ def self.add_importer_commands(cmd)
18
+ commands = []
19
+ BuntoImport::Importer.subclasses.each do |importer|
20
+ name = importer.to_s.split("::").last.downcase
21
+ commands << name
22
+ cmd.command(name.to_sym) do |c|
23
+ c.syntax "#{name} [options]"
24
+ importer.specify_options(c)
25
+ c.action do |_, options|
26
+ importer.run(options)
27
+ end
28
+ end
29
+ end
30
+ commands
31
+ end
32
+
33
+ def self.require_with_fallback(gems)
34
+ Array[gems].flatten.each do |gem|
35
+ begin
36
+ require gem
37
+ rescue LoadError
38
+ Bunto.logger.error "Whoops! Looks like you need to install '#{gem}' before you can use this importer."
39
+ Bunto.logger.error ""
40
+ Bunto.logger.error "If you're using bundler:"
41
+ Bunto.logger.error " 1. Add 'gem \"#{gem}\"' to your Gemfile"
42
+ Bunto.logger.error " 2. Run 'bundle install'"
43
+ Bunto.logger.error ""
44
+ Bunto.logger.error "If you're not using bundler:"
45
+ Bunto.logger.abort_with " 1. Run 'gem install #{gem}'."
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,26 @@
1
+ module BuntoImport
2
+ class Importer
3
+ def self.inherited(base)
4
+ subclasses << base
5
+ end
6
+
7
+ def self.subclasses
8
+ @subclasses ||= []
9
+ end
10
+
11
+ def self.stringify_keys(hash)
12
+ the_hash = hash.clone
13
+ the_hash.keys.each do |key|
14
+ the_hash[(key.to_s rescue key) || key] = the_hash.delete(key)
15
+ end
16
+ the_hash
17
+ end
18
+
19
+ def self.run(options = {})
20
+ opts = stringify_keys(options)
21
+ self.require_deps
22
+ self.validate(opts) if self.respond_to?(:validate)
23
+ self.process(opts)
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,10 @@
1
+ module BuntoImport
2
+ module Importers
3
+ Dir.chdir(File.expand_path(File.join("importers"), File.dirname(__FILE__))) do
4
+ Dir.entries(".").each do |f|
5
+ next if f[0..0].eql?(".")
6
+ require "bunto-import/importers/#{f}"
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,80 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Behance < Importer
4
+ def self.require_deps
5
+ BuntoImport.require_with_fallback(%w[
6
+ fileutils
7
+ safe_yaml
8
+ date
9
+ time
10
+ behance
11
+ ])
12
+ end
13
+
14
+ def self.specify_options(c)
15
+ c.option 'user', '--user NAME', 'The username of the account'
16
+ c.option 'api_token', '--api_token TOKEN', 'The API access token for the account'
17
+ end
18
+
19
+ def self.validate(options)
20
+ %w[user api_token].each do |option|
21
+ if options[option].nil?
22
+ abort "Missing mandatory option --#{option}."
23
+ end
24
+ end
25
+ end
26
+
27
+ # Process the import.
28
+ #
29
+ # user - the behance user to retrieve projects (ID or username)
30
+ # api_token - your developer API Token
31
+ #
32
+ # Returns nothing.
33
+ def self.process(options)
34
+ user = options.fetch('user')
35
+ token = options.fetch('api_token')
36
+
37
+ client = fetch_behance(token)
38
+
39
+ user_projects = client.user_projects(user)
40
+
41
+ puts "#{user_projects.length} project(s) found. Importing now..."
42
+
43
+ user_projects.each do |project|
44
+
45
+ details = client.project(project['id'])
46
+ title = project['name'].to_s
47
+ formatted_date = Time.at(project['published_on'].to_i).to_date.to_s
48
+
49
+ post_name = title.split(%r{ |!|/|:|&|-|$|,}).map do |character|
50
+ character.downcase unless character.empty?
51
+ end.compact.join('-')
52
+
53
+ name = "#{formatted_date}-#{post_name}"
54
+
55
+ header = {
56
+ "layout" => "post",
57
+ "title" => title,
58
+ "details" => details
59
+ }
60
+
61
+ FileUtils.mkdir_p("_posts")
62
+
63
+ File.open("_posts/#{name}.md", "w") do |f|
64
+ f.puts header.to_yaml
65
+ f.puts "---\n\n"
66
+ f.puts details['description'].to_s
67
+ end
68
+ end
69
+
70
+ puts "Finished importing."
71
+ end
72
+
73
+ private
74
+
75
+ def self.fetch_behance(token)
76
+ ::Behance::Client.new(access_token: token)
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,264 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Blogger < Importer
4
+ def self.specify_options(c)
5
+ c.option 'source', '--source NAME', 'The XML file (blog-MM-DD-YYYY.xml) path to import'
6
+ c.option 'no-blogger-info', '--no-blogger-info', 'not to leave blogger-URL info (id and old URL) in the front matter (default: false)'
7
+ c.option 'replace-internal-link', '--replace-internal-link', 'replace internal links using the post_url liquid tag. (default: false)'
8
+ end
9
+
10
+ def self.validate(options)
11
+ if options['source'].nil?
12
+ raise 'Missing mandatory option: --source'
13
+ elsif not File.exist?(options['source'])
14
+ raise Errno::ENOENT, "File not found: #{options['source']}"
15
+ end
16
+ end
17
+
18
+ def self.require_deps
19
+ BuntoImport.require_with_fallback(%w[
20
+ rexml/document
21
+ rexml/streamlistener
22
+ rexml/parsers/streamparser
23
+ uri
24
+ time
25
+ fileutils
26
+ safe_yaml
27
+ open-uri
28
+ ])
29
+ end
30
+
31
+ # Process the import.
32
+ #
33
+ # source:: a local file String (or IO object for internal use purpose)..
34
+ # no-blogger-info:: a boolean if not leave blogger info (id and original URL).
35
+ # replace-internal-link:: a boolean if replace internal link
36
+ #
37
+ # Returns nothing.
38
+ def self.process(options)
39
+ source = options.fetch('source')
40
+
41
+ listener = BloggerAtomStreamListener.new
42
+
43
+ listener.leave_blogger_info = ! options.fetch('no-blogger-info', false),
44
+
45
+ File.open(source, 'r') do |f|
46
+ f.flock(File::LOCK_SH)
47
+ REXML::Parsers::StreamParser.new(f, listener).parse()
48
+ end
49
+
50
+ options['original-url-base'] = listener.original_url_base
51
+
52
+ postprocess(options)
53
+ end
54
+
55
+ # Post-process after import.
56
+ #
57
+ # replace-internal-link:: a boolean if replace internal link
58
+ #
59
+ # Returns nothing.
60
+ def self.postprocess(options)
61
+ # Replace internal link URL
62
+ if options.fetch('replace-internal-link', false)
63
+ original_url_base = options.fetch('original-url-base', nil)
64
+ if original_url_base
65
+ orig_url_pattern = Regexp.new(" href=([\"\'])(?:#{Regexp.escape(original_url_base)})?/([0-9]{4})/([0-9]{2})/([^\"\']+\.html)\\1")
66
+
67
+ Dir.glob('_posts/*.*') do |filename|
68
+ body = nil
69
+ File.open(filename, 'r') do |f|
70
+ f.flock(File::LOCK_SH)
71
+ body = f.read
72
+ end
73
+
74
+ body.gsub!(orig_url_pattern) do
75
+ # for post_url
76
+ quote = $1
77
+ post_file = Dir.glob("_posts/#{$2}-#{$3}-*-#{$4.to_s.tr('/', '-')}").first
78
+ raise "Could not found: _posts/#{$2}-#{$3}-*-#{$4.to_s.tr('/', '-')}" if post_file.nil?
79
+ " href=#{quote}{{ site.baseurl }}{% post_url #{File.basename(post_file, '.html')} %}#{quote}"
80
+ end
81
+
82
+ File.open(filename, 'w') do |f|
83
+ f.flock(File::LOCK_EX)
84
+ f << body
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
90
+
91
+ class BloggerAtomStreamListener
92
+ def initialize
93
+ # use `extend` instead of `include` to use `require_deps` instead of `require`.
94
+ extend REXML::StreamListener
95
+ extend BloggerAtomStreamListenerMethods
96
+
97
+ @leave_blogger_info = true
98
+ end
99
+ end
100
+
101
+ module BloggerAtomStreamListenerMethods
102
+ attr_accessor :leave_blogger_info
103
+ attr_reader :original_url_base
104
+
105
+ def tag_start(tag, attrs)
106
+ @tag_bread = [] unless @tag_bread
107
+ @tag_bread.push(tag)
108
+
109
+ case tag
110
+ when 'entry'
111
+ raise 'nest entry element' if @in_entry_elem
112
+ @in_entry_elem = {:meta => {}, :body => nil}
113
+ when 'title'
114
+ if @in_entry_elem
115
+ raise 'only <title type="text"></title> is supported' if attrs['type'] != 'text'
116
+ end
117
+ when 'category'
118
+ if @in_entry_elem
119
+ if attrs['scheme'] == 'http://www.blogger.com/atom/ns#'
120
+ @in_entry_elem[:meta][:category] = [] unless @in_entry_elem[:meta][:category]
121
+ @in_entry_elem[:meta][:category] << attrs['term']
122
+ elsif attrs['scheme'] == 'http://schemas.google.com/g/2005#kind'
123
+ kind = attrs['term']
124
+ kind.sub!(Regexp.new("^http://schemas\\.google\\.com/blogger/2008/kind\\#"), '')
125
+ @in_entry_elem[:meta][:kind] = kind
126
+ end
127
+ end
128
+ when 'content'
129
+ if @in_entry_elem
130
+ @in_entry_elem[:meta][:content_type] = attrs['type']
131
+ end
132
+ when 'link'
133
+ if @in_entry_elem
134
+ if attrs['rel'] == 'alternate' && attrs['type'] == 'text/html'
135
+ @in_entry_elem[:meta][:original_url] = attrs['href']
136
+ elsif attrs['rel'] == 'replies' && attrs['type'] == 'text/html'
137
+ unless @in_entry_elem[:meta][:original_url]
138
+ @in_entry_elem[:meta][:original_url] = attrs['href'].sub(/\#comment-form$/, '')
139
+ end
140
+ end
141
+ end
142
+ when 'media:thumbnail'
143
+ if @in_entry_elem
144
+ @in_entry_elem[:meta][:thumbnail] = attrs['url']
145
+ end
146
+ end
147
+ end
148
+
149
+ def text(text)
150
+ if @in_entry_elem
151
+ case @tag_bread.last
152
+ when 'id'
153
+ @in_entry_elem[:meta][:id] = text
154
+ when 'published'
155
+ @in_entry_elem[:meta][:published] = text
156
+ when 'updated'
157
+ @in_entry_elem[:meta][:updated] = text
158
+ when 'title'
159
+ @in_entry_elem[:meta][:title] = text
160
+ when 'content'
161
+ @in_entry_elem[:body] = text
162
+ when 'name'
163
+ if @tag_bread[-2..-1] == %w[author name]
164
+ @in_entry_elem[:meta][:author] = text
165
+ end
166
+ when 'app:draft'
167
+ if @tag_bread[-2..-1] == %w[app:control app:draft]
168
+ @in_entry_elem[:meta][:draft] = true if text == 'yes'
169
+ end
170
+ end
171
+ end
172
+ end
173
+
174
+ def tag_end(tag)
175
+ case tag
176
+ when 'entry'
177
+ raise 'nest entry element' unless @in_entry_elem
178
+
179
+ if @in_entry_elem[:meta][:kind] == 'post'
180
+ post_data = get_post_data_from_in_entry_elem_info
181
+
182
+ if post_data
183
+ target_dir = '_posts'
184
+ target_dir = '_drafts' if @in_entry_elem[:meta][:draft]
185
+
186
+ FileUtils.mkdir_p(target_dir)
187
+
188
+ file_name = URI::decode("#{post_data[:filename]}.html")
189
+ File.open(File.join(target_dir, file_name), 'w') do |f|
190
+ f.flock(File::LOCK_EX)
191
+
192
+ f << post_data[:header].to_yaml
193
+ f << "---\n\n"
194
+ f << post_data[:body]
195
+ end
196
+ end
197
+ end
198
+
199
+ @in_entry_elem = nil
200
+ end
201
+
202
+ @tag_bread.pop
203
+ end
204
+
205
+ def get_post_data_from_in_entry_elem_info
206
+ if (@in_entry_elem.nil? || ! @in_entry_elem.has_key?(:meta) || ! @in_entry_elem[:meta].has_key?(:kind))
207
+ nil
208
+ elsif @in_entry_elem[:meta][:kind] == 'post'
209
+ timestamp = Time.parse(@in_entry_elem[:meta][:published]).strftime('%Y-%m-%d')
210
+ if @in_entry_elem[:meta][:original_url]
211
+ original_uri = URI.parse(@in_entry_elem[:meta][:original_url])
212
+ original_path = original_uri.path.to_s
213
+ filename = "%s-%s" %
214
+ [timestamp,
215
+ File.basename(original_path, File.extname(original_path))]
216
+
217
+ @original_url_base = "#{original_uri.scheme}://#{original_uri.host}"
218
+ elsif @in_entry_elem[:meta][:draft]
219
+ # Drafts don't have published urls
220
+ name = @in_entry_elem[:meta][:title]
221
+ if name.nil?
222
+ filename = timestamp
223
+ else
224
+ filename = "%s-%s" %
225
+ [timestamp,
226
+ CGI.escape(name.downcase).tr('+','-')]
227
+ end
228
+ else
229
+ raise 'Original URL is missing'
230
+ end
231
+
232
+ header = {
233
+ 'layout' => 'post',
234
+ 'title' => @in_entry_elem[:meta][:title],
235
+ 'date' => @in_entry_elem[:meta][:published],
236
+ 'author' => @in_entry_elem[:meta][:author],
237
+ 'tags' => @in_entry_elem[:meta][:category],
238
+ }
239
+ header['modified_time'] = @in_entry_elem[:meta][:updated] if @in_entry_elem[:meta][:updated] && @in_entry_elem[:meta][:updated] != @in_entry_elem[:meta][:published]
240
+ header['thumbnail'] = @in_entry_elem[:meta][:thumbnail] if @in_entry_elem[:meta][:thumbnail]
241
+ header['blogger_id'] = @in_entry_elem[:meta][:id] if @leave_blogger_info
242
+ header['blogger_orig_url'] = @in_entry_elem[:meta][:original_url] if @leave_blogger_info && @in_entry_elem[:meta][:original_url]
243
+
244
+ body = @in_entry_elem[:body]
245
+
246
+ # body escaping associated with liquid
247
+ if body =~ /{{/
248
+ body.gsub!(/{{/, '{{ "{{" }}')
249
+ end
250
+ if body =~ /{%/
251
+ body.gsub!(/{%/, '{{ "{%" }}')
252
+ end
253
+
254
+ { :filename => filename, :header => header, :body => body }
255
+ else
256
+ nil
257
+ end
258
+ end
259
+
260
+ end
261
+
262
+ end
263
+ end
264
+ end