bunto-import 2.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +21 -21
  3. data/README.markdown +33 -33
  4. data/lib/bunto-import.rb +49 -49
  5. data/lib/bunto-import/importer.rb +26 -26
  6. data/lib/bunto-import/importers.rb +10 -10
  7. data/lib/bunto-import/importers/behance.rb +80 -80
  8. data/lib/bunto-import/importers/blogger.rb +330 -264
  9. data/lib/bunto-import/importers/csv.rb +96 -96
  10. data/lib/bunto-import/importers/drupal6.rb +53 -139
  11. data/lib/bunto-import/importers/drupal7.rb +54 -111
  12. data/lib/bunto-import/importers/drupal_common.rb +157 -0
  13. data/lib/bunto-import/importers/easyblog.rb +96 -96
  14. data/lib/bunto-import/importers/enki.rb +74 -74
  15. data/lib/bunto-import/importers/ghost.rb +68 -68
  16. data/lib/bunto-import/importers/google_reader.rb +64 -64
  17. data/lib/bunto-import/importers/joomla.rb +92 -90
  18. data/lib/bunto-import/importers/joomla3.rb +91 -91
  19. data/lib/bunto-import/importers/jrnl.rb +125 -125
  20. data/lib/bunto-import/importers/marley.rb +72 -72
  21. data/lib/bunto-import/importers/mephisto.rb +99 -99
  22. data/lib/bunto-import/importers/mt.rb +257 -257
  23. data/lib/bunto-import/importers/posterous.rb +130 -130
  24. data/lib/bunto-import/importers/rss.rb +62 -62
  25. data/lib/bunto-import/importers/s9y.rb +60 -60
  26. data/lib/bunto-import/importers/s9y_database.rb +363 -0
  27. data/lib/bunto-import/importers/textpattern.rb +70 -70
  28. data/lib/bunto-import/importers/tumblr.rb +300 -289
  29. data/lib/bunto-import/importers/typo.rb +88 -88
  30. data/lib/bunto-import/importers/wordpress.rb +372 -372
  31. data/lib/bunto-import/importers/wordpressdotcom.rb +207 -207
  32. data/lib/bunto-import/util.rb +76 -76
  33. data/lib/bunto-import/version.rb +3 -3
  34. data/lib/bunto/commands/import.rb +79 -79
  35. metadata +84 -54
@@ -1,125 +1,125 @@
1
- module BuntoImport
2
- module Importers
3
- class Jrnl < Importer
4
-
5
- def self.require_deps
6
- BuntoImport.require_with_fallback(%w[
7
- time
8
- rubygems
9
- safe_yaml
10
- ])
11
- end
12
-
13
- def self.specify_options(c)
14
- c.option 'file', '--file FILENAME', 'Journal file (default: "~/journal.txt")'
15
- c.option 'time_format', '--time_format FORMAT', 'Time format of your journal (default: "%Y-%m-%d %H:%M")'
16
- c.option 'extension', '--extension EXT', 'Output extension (default: "md")'
17
- c.option 'layout', '--layout NAME', 'Output post layout (default: "post")'
18
- end
19
-
20
- # Reads a jrnl file and creates a new post for each entry
21
- # The following overrides are available:
22
- # :file path to input file
23
- # :time_format the format used by the jrnl configuration
24
- # :extension the extension format of the output files
25
- # :layout explicitly set the layout of the output
26
- def self.process(options)
27
- file = options.fetch('file', "~/journal.txt")
28
- time_format = options.fetch('time_format', "%Y-%m-%d %H:%M")
29
- extension = options.fetch('extension', "md")
30
- layout = options.fetch('layout', "post")
31
-
32
- date_length = Time.now.strftime(time_format).length
33
-
34
- # convert relative to absolute if needed
35
- file = File.expand_path(file)
36
-
37
- abort "The jrnl file was not found. Please make sure '#{file}' exists. You can specify a different file using the --file switch." unless File.file?(file)
38
-
39
- input = File.read(file)
40
- entries = input.split("\n\n");
41
-
42
- entries.each do |entry|
43
- # split dateline and body
44
- # content[0] has the date and title
45
- # content[1] has the post body
46
- content = entry.split("\n")
47
-
48
- body = get_post_content(content)
49
- date = get_date(content[0], date_length)
50
- title = get_title(content[0], date_length)
51
- slug = create_slug(title)
52
- filename = create_filename(date, slug, extension)
53
- meta = create_meta(layout, title, date) # prepare YAML meta data
54
-
55
- write_file(filename, meta, body) # write to file
56
- end
57
- end
58
-
59
- # strip body from jrnl entry
60
- def self.get_post_content(content)
61
- return content[1]
62
- end
63
-
64
- # strip timestamp from the dateline
65
- def self.get_date(content, offset)
66
- return content[0, offset]
67
- end
68
-
69
- # strip title from the dateline
70
- def self.get_title(content, offset)
71
- return content[offset + 1, content.length]
72
- end
73
-
74
- # generate slug
75
- def self.create_slug(title)
76
- return title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
77
- end
78
-
79
- # generate filename
80
- def self.create_filename(date, slug, extension)
81
- return "#{Time.parse(date).strftime("%Y-%m-%d")}-#{slug}.#{extension}"
82
- end
83
-
84
- # Prepare YAML meta data
85
- #
86
- # layout - name of the layout
87
- # title - title of the entry
88
- # date - date of entry creation
89
- #
90
- # Examples
91
- #
92
- # create_meta("post", "Entry 1", "2013-01-01 13:00")
93
- # # => "---\nlayout: post\ntitle: Entry 1\ndate: 2013-01-01 13:00\n"
94
- #
95
- # Returns array converted to YAML
96
- def self.create_meta(layout, title, date)
97
- data = {
98
- 'layout' => layout,
99
- 'title' => title,
100
- 'date' => Time.parse(date).strftime("%Y-%m-%d %H:%M %z")
101
- }.to_yaml
102
- return data;
103
- end
104
-
105
- # Writes given data to file
106
- #
107
- # filename - name of the output file
108
- # meta - YAML header data
109
- # body - jrnl entry content
110
- #
111
- # Examples
112
- #
113
- # write_file("2013-01-01-entry-1.md", "---\nlayout: post\ntitle: Entry 1\ndate: 2013-01-01 13:00\n", "This is the first entry for my new journal")
114
- #
115
- # Writes file to _posts/filename
116
- def self.write_file(filename, meta, body)
117
- File.open("_posts/#{filename}", "w") do |f|
118
- f.puts meta
119
- f.puts "---\n\n"
120
- f.puts body
121
- end
122
- end
123
- end
124
- end
125
- end
1
+ module BuntoImport
2
+ module Importers
3
+ class Jrnl < Importer
4
+
5
+ def self.require_deps
6
+ BuntoImport.require_with_fallback(%w[
7
+ time
8
+ rubygems
9
+ safe_yaml
10
+ ])
11
+ end
12
+
13
+ def self.specify_options(c)
14
+ c.option 'file', '--file FILENAME', 'Journal file (default: "~/journal.txt")'
15
+ c.option 'time_format', '--time_format FORMAT', 'Time format of your journal (default: "%Y-%m-%d %H:%M")'
16
+ c.option 'extension', '--extension EXT', 'Output extension (default: "md")'
17
+ c.option 'layout', '--layout NAME', 'Output post layout (default: "post")'
18
+ end
19
+
20
+ # Reads a jrnl file and creates a new post for each entry
21
+ # The following overrides are available:
22
+ # :file path to input file
23
+ # :time_format the format used by the jrnl configuration
24
+ # :extension the extension format of the output files
25
+ # :layout explicitly set the layout of the output
26
+ def self.process(options)
27
+ file = options.fetch('file', "~/journal.txt")
28
+ time_format = options.fetch('time_format', "%Y-%m-%d %H:%M")
29
+ extension = options.fetch('extension', "md")
30
+ layout = options.fetch('layout', "post")
31
+
32
+ date_length = Time.now.strftime(time_format).length
33
+
34
+ # convert relative to absolute if needed
35
+ file = File.expand_path(file)
36
+
37
+ abort "The jrnl file was not found. Please make sure '#{file}' exists. You can specify a different file using the --file switch." unless File.file?(file)
38
+
39
+ input = File.read(file)
40
+ entries = input.split("\n\n");
41
+
42
+ entries.each do |entry|
43
+ # split dateline and body
44
+ # content[0] has the date and title
45
+ # content[1] has the post body
46
+ content = entry.split("\n")
47
+
48
+ body = get_post_content(content)
49
+ date = get_date(content[0], date_length)
50
+ title = get_title(content[0], date_length)
51
+ slug = create_slug(title)
52
+ filename = create_filename(date, slug, extension)
53
+ meta = create_meta(layout, title, date) # prepare YAML meta data
54
+
55
+ write_file(filename, meta, body) # write to file
56
+ end
57
+ end
58
+
59
+ # strip body from jrnl entry
60
+ def self.get_post_content(content)
61
+ return content[1]
62
+ end
63
+
64
+ # strip timestamp from the dateline
65
+ def self.get_date(content, offset)
66
+ return content[0, offset]
67
+ end
68
+
69
+ # strip title from the dateline
70
+ def self.get_title(content, offset)
71
+ return content[offset + 1, content.length]
72
+ end
73
+
74
+ # generate slug
75
+ def self.create_slug(title)
76
+ return title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
77
+ end
78
+
79
+ # generate filename
80
+ def self.create_filename(date, slug, extension)
81
+ return "#{Time.parse(date).strftime("%Y-%m-%d")}-#{slug}.#{extension}"
82
+ end
83
+
84
+ # Prepare YAML meta data
85
+ #
86
+ # layout - name of the layout
87
+ # title - title of the entry
88
+ # date - date of entry creation
89
+ #
90
+ # Examples
91
+ #
92
+ # create_meta("post", "Entry 1", "2013-01-01 13:00")
93
+ # # => "---\nlayout: post\ntitle: Entry 1\ndate: 2013-01-01 13:00\n"
94
+ #
95
+ # Returns array converted to YAML
96
+ def self.create_meta(layout, title, date)
97
+ data = {
98
+ 'layout' => layout,
99
+ 'title' => title,
100
+ 'date' => Time.parse(date).strftime("%Y-%m-%d %H:%M %z")
101
+ }.to_yaml
102
+ return data;
103
+ end
104
+
105
+ # Writes given data to file
106
+ #
107
+ # filename - name of the output file
108
+ # meta - YAML header data
109
+ # body - jrnl entry content
110
+ #
111
+ # Examples
112
+ #
113
+ # write_file("2013-01-01-entry-1.md", "---\nlayout: post\ntitle: Entry 1\ndate: 2013-01-01 13:00\n", "This is the first entry for my new journal")
114
+ #
115
+ # Writes file to _posts/filename
116
+ def self.write_file(filename, meta, body)
117
+ File.open("_posts/#{filename}", "w") do |f|
118
+ f.puts meta
119
+ f.puts "---\n\n"
120
+ f.puts body
121
+ end
122
+ end
123
+ end
124
+ end
125
+ end
@@ -1,72 +1,72 @@
1
- module BuntoImport
2
- module Importers
3
- class Marley < Importer
4
- def self.validate(options)
5
- if options['marley_data_dir'].nil?
6
- Bunto.logger.abort_with "Missing mandatory option --marley_data_dir."
7
- else
8
- unless File.directory?(options['marley_data_dir'])
9
- raise ArgumentError, "marley dir '#{options['marley_data_dir']}' not found"
10
- end
11
- end
12
- end
13
-
14
- def self.regexp
15
- { :id => /^\d{0,4}-{0,1}(.*)$/,
16
- :title => /^#\s*(.*)\s+$/,
17
- :title_with_date => /^#\s*(.*)\s+\(([0-9\/]+)\)$/,
18
- :published_on => /.*\s+\(([0-9\/]+)\)$/,
19
- :perex => /^([^\#\n]+\n)$/,
20
- :meta => /^\{\{\n(.*)\}\}\n$/mi # Multiline Regexp
21
- }
22
- end
23
-
24
- def self.require_deps
25
- BuntoImport.require_with_fallback(%w[
26
- fileutils
27
- safe_yaml
28
- ])
29
- end
30
-
31
- def self.specify_options(c)
32
- c.option 'marley_data_dir', '--marley_data_dir DIR', 'The dir containing your marley data'
33
- end
34
-
35
- def self.process(options)
36
- marley_data_dir = options.fetch('marley_data_dir')
37
-
38
- FileUtils.mkdir_p "_posts"
39
-
40
- posts = 0
41
- Dir["#{marley_data_dir}/**/*.txt"].each do |f|
42
- next unless File.exists?(f)
43
-
44
- #copied over from marley's app/lib/post.rb
45
- file_content = File.read(f)
46
- meta_content = file_content.slice!( self.regexp[:meta] )
47
- body = file_content.sub( self.regexp[:title], '').sub( self.regexp[:perex], '').strip
48
-
49
- title = file_content.scan( self.regexp[:title] ).first.to_s.strip
50
- prerex = file_content.scan( self.regexp[:perex] ).first.to_s.strip
51
- published_on = DateTime.parse( post[:published_on] ) rescue File.mtime( File.dirname(f) )
52
- meta = ( meta_content ) ? YAML::load( meta_content.scan( self.regexp[:meta]).to_s ) : {}
53
- meta['title'] = title
54
- meta['layout'] = 'post'
55
-
56
- formatted_date = published_on.strftime('%Y-%m-%d')
57
- post_name = File.dirname(f).split(%r{/}).last.gsub(/\A\d+-/, '')
58
-
59
- name = "#{formatted_date}-#{post_name}"
60
- File.open("_posts/#{name}.markdown", "w") do |f|
61
- f.puts meta.to_yaml
62
- f.puts "---\n"
63
- f.puts "\n#{prerex}\n\n" if prerex
64
- f.puts body
65
- end
66
- posts += 1
67
- end
68
- "Created #{posts} posts!"
69
- end
70
- end
71
- end
72
- end
1
+ module BuntoImport
2
+ module Importers
3
+ class Marley < Importer
4
+ def self.validate(options)
5
+ if options['marley_data_dir'].nil?
6
+ Bunto.logger.abort_with "Missing mandatory option --marley_data_dir."
7
+ else
8
+ unless File.directory?(options['marley_data_dir'])
9
+ raise ArgumentError, "marley dir '#{options['marley_data_dir']}' not found"
10
+ end
11
+ end
12
+ end
13
+
14
+ def self.regexp
15
+ { :id => /^\d{0,4}-{0,1}(.*)$/,
16
+ :title => /^#\s*(.*)\s+$/,
17
+ :title_with_date => /^#\s*(.*)\s+\(([0-9\/]+)\)$/,
18
+ :published_on => /.*\s+\(([0-9\/]+)\)$/,
19
+ :perex => /^([^\#\n]+\n)$/,
20
+ :meta => /^\{\{\n(.*)\}\}\n$/mi # Multiline Regexp
21
+ }
22
+ end
23
+
24
+ def self.require_deps
25
+ BuntoImport.require_with_fallback(%w[
26
+ fileutils
27
+ safe_yaml
28
+ ])
29
+ end
30
+
31
+ def self.specify_options(c)
32
+ c.option 'marley_data_dir', '--marley_data_dir DIR', 'The dir containing your marley data'
33
+ end
34
+
35
+ def self.process(options)
36
+ marley_data_dir = options.fetch('marley_data_dir')
37
+
38
+ FileUtils.mkdir_p "_posts"
39
+
40
+ posts = 0
41
+ Dir["#{marley_data_dir}/**/*.txt"].each do |f|
42
+ next unless File.exists?(f)
43
+
44
+ #copied over from marley's app/lib/post.rb
45
+ file_content = File.read(f)
46
+ meta_content = file_content.slice!( self.regexp[:meta] )
47
+ body = file_content.sub( self.regexp[:title], '').sub( self.regexp[:perex], '').strip
48
+
49
+ title = file_content.scan( self.regexp[:title] ).first.to_s.strip
50
+ prerex = file_content.scan( self.regexp[:perex] ).first.to_s.strip
51
+ published_on = DateTime.parse( post[:published_on] ) rescue File.mtime( File.dirname(f) )
52
+ meta = ( meta_content ) ? YAML::load( meta_content.scan( self.regexp[:meta]).to_s ) : {}
53
+ meta['title'] = title
54
+ meta['layout'] = 'post'
55
+
56
+ formatted_date = published_on.strftime('%Y-%m-%d')
57
+ post_name = File.dirname(f).split(%r{/}).last.gsub(/\A\d+-/, '')
58
+
59
+ name = "#{formatted_date}-#{post_name}"
60
+ File.open("_posts/#{name}.markdown", "w") do |f|
61
+ f.puts meta.to_yaml
62
+ f.puts "---\n"
63
+ f.puts "\n#{prerex}\n\n" if prerex
64
+ f.puts body
65
+ end
66
+ posts += 1
67
+ end
68
+ "Created #{posts} posts!"
69
+ end
70
+ end
71
+ end
72
+ end
@@ -1,99 +1,99 @@
1
- module BuntoImport
2
- module Importers
3
- class Mephisto < Importer
4
- #Accepts a hash with database config variables, exports mephisto posts into a csv
5
- #export PGPASSWORD if you must
6
- def self.postgres(c)
7
- sql = <<-SQL
8
- BEGIN;
9
- CREATE TEMP TABLE bunto AS
10
- SELECT title, permalink, body, published_at, filter FROM contents
11
- WHERE user_id = 1 AND type = 'Article' ORDER BY published_at;
12
- COPY bunto TO STDOUT WITH CSV HEADER;
13
- ROLLBACK;
14
- SQL
15
- command = %Q(psql -h #{c[:host] || "localhost"} -c "#{sql.strip}" #{c[:database]} #{c[:username]} -o #{c[:filename] || "posts.csv"})
16
- puts command
17
- `#{command}`
18
- CSV.process
19
- end
20
-
21
- def self.validate(options)
22
- %w[dbname user].each do |option|
23
- if options[option].nil?
24
- abort "Missing mandatory option --#{option}."
25
- end
26
- end
27
- end
28
-
29
- def self.require_deps
30
- BuntoImport.require_with_fallback(%w[
31
- rubygems
32
- sequel
33
- fastercsv
34
- fileutils
35
- ])
36
- end
37
-
38
- def self.specify_options(c)
39
- c.option 'dbname', '--dbname DB', 'Database name'
40
- c.option 'user', '--user USER', 'Database user name'
41
- c.option 'password', '--password PW', "Database user's password (default: '')"
42
- c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
43
- end
44
-
45
- # This query will pull blog posts from all entries across all blogs. If
46
- # you've got unpublished, deleted or otherwise hidden posts please sift
47
- # through the created posts to make sure nothing is accidently published.
48
- QUERY = "SELECT id, \
49
- permalink, \
50
- body, \
51
- published_at, \
52
- title \
53
- FROM contents \
54
- WHERE user_id = 1 AND \
55
- type = 'Article' AND \
56
- published_at IS NOT NULL \
57
- ORDER BY published_at"
58
-
59
- def self.process(options)
60
- dbname = options.fetch('dbname')
61
- user = options.fetch('user')
62
- pass = options.fetch('password', '')
63
- host = options.fetch('host', "localhost")
64
-
65
- db = Sequel.mysql(dbname, :user => user,
66
- :password => pass,
67
- :host => host,
68
- :encoding => 'utf8')
69
-
70
- FileUtils.mkdir_p "_posts"
71
-
72
- db[QUERY].each do |post|
73
- title = post[:title]
74
- slug = post[:permalink]
75
- date = post[:published_at]
76
- content = post[:body]
77
-
78
- # Ideally, this script would determine the post format (markdown,
79
- # html, etc) and create files with proper extensions. At this point
80
- # it just assumes that markdown will be acceptable.
81
- name = [date.year, date.month, date.day, slug].join('-') + ".markdown"
82
-
83
- data = {
84
- 'layout' => 'post',
85
- 'title' => title.to_s,
86
- 'mt_id' => post[:entry_id],
87
- }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
88
-
89
- File.open("_posts/#{name}", "w") do |f|
90
- f.puts data
91
- f.puts "---"
92
- f.puts content
93
- end
94
- end
95
-
96
- end
97
- end
98
- end
99
- end
1
+ module BuntoImport
2
+ module Importers
3
+ class Mephisto < Importer
4
+ #Accepts a hash with database config variables, exports mephisto posts into a csv
5
+ #export PGPASSWORD if you must
6
+ def self.postgres(c)
7
+ sql = <<-SQL
8
+ BEGIN;
9
+ CREATE TEMP TABLE bunto AS
10
+ SELECT title, permalink, body, published_at, filter FROM contents
11
+ WHERE user_id = 1 AND type = 'Article' ORDER BY published_at;
12
+ COPY bunto TO STDOUT WITH CSV HEADER;
13
+ ROLLBACK;
14
+ SQL
15
+ command = %Q(psql -h #{c[:host] || "localhost"} -c "#{sql.strip}" #{c[:database]} #{c[:username]} -o #{c[:filename] || "posts.csv"})
16
+ puts command
17
+ `#{command}`
18
+ CSV.process
19
+ end
20
+
21
+ def self.validate(options)
22
+ %w[dbname user].each do |option|
23
+ if options[option].nil?
24
+ abort "Missing mandatory option --#{option}."
25
+ end
26
+ end
27
+ end
28
+
29
+ def self.require_deps
30
+ BuntoImport.require_with_fallback(%w[
31
+ rubygems
32
+ sequel
33
+ fastercsv
34
+ fileutils
35
+ ])
36
+ end
37
+
38
+ def self.specify_options(c)
39
+ c.option 'dbname', '--dbname DB', 'Database name'
40
+ c.option 'user', '--user USER', 'Database user name'
41
+ c.option 'password', '--password PW', "Database user's password (default: '')"
42
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
43
+ end
44
+
45
+ # This query will pull blog posts from all entries across all blogs. If
46
+ # you've got unpublished, deleted or otherwise hidden posts please sift
47
+ # through the created posts to make sure nothing is accidently published.
48
+ QUERY = "SELECT id, \
49
+ permalink, \
50
+ body, \
51
+ published_at, \
52
+ title \
53
+ FROM contents \
54
+ WHERE user_id = 1 AND \
55
+ type = 'Article' AND \
56
+ published_at IS NOT NULL \
57
+ ORDER BY published_at"
58
+
59
+ def self.process(options)
60
+ dbname = options.fetch('dbname')
61
+ user = options.fetch('user')
62
+ pass = options.fetch('password', '')
63
+ host = options.fetch('host', "localhost")
64
+
65
+ db = Sequel.mysql(dbname, :user => user,
66
+ :password => pass,
67
+ :host => host,
68
+ :encoding => 'utf8')
69
+
70
+ FileUtils.mkdir_p "_posts"
71
+
72
+ db[QUERY].each do |post|
73
+ title = post[:title]
74
+ slug = post[:permalink]
75
+ date = post[:published_at]
76
+ content = post[:body]
77
+
78
+ # Ideally, this script would determine the post format (markdown,
79
+ # html, etc) and create files with proper extensions. At this point
80
+ # it just assumes that markdown will be acceptable.
81
+ name = [date.year, date.month, date.day, slug].join('-') + ".markdown"
82
+
83
+ data = {
84
+ 'layout' => 'post',
85
+ 'title' => title.to_s,
86
+ 'mt_id' => post[:entry_id],
87
+ }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
88
+
89
+ File.open("_posts/#{name}", "w") do |f|
90
+ f.puts data
91
+ f.puts "---"
92
+ f.puts content
93
+ end
94
+ end
95
+
96
+ end
97
+ end
98
+ end
99
+ end