bunto-import 2.0.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +21 -21
  3. data/README.markdown +33 -33
  4. data/lib/bunto-import.rb +49 -49
  5. data/lib/bunto-import/importer.rb +26 -26
  6. data/lib/bunto-import/importers.rb +10 -10
  7. data/lib/bunto-import/importers/behance.rb +80 -80
  8. data/lib/bunto-import/importers/blogger.rb +330 -264
  9. data/lib/bunto-import/importers/csv.rb +96 -96
  10. data/lib/bunto-import/importers/drupal6.rb +53 -139
  11. data/lib/bunto-import/importers/drupal7.rb +54 -111
  12. data/lib/bunto-import/importers/drupal_common.rb +157 -0
  13. data/lib/bunto-import/importers/easyblog.rb +96 -96
  14. data/lib/bunto-import/importers/enki.rb +74 -74
  15. data/lib/bunto-import/importers/ghost.rb +68 -68
  16. data/lib/bunto-import/importers/google_reader.rb +64 -64
  17. data/lib/bunto-import/importers/joomla.rb +92 -90
  18. data/lib/bunto-import/importers/joomla3.rb +91 -91
  19. data/lib/bunto-import/importers/jrnl.rb +125 -125
  20. data/lib/bunto-import/importers/marley.rb +72 -72
  21. data/lib/bunto-import/importers/mephisto.rb +99 -99
  22. data/lib/bunto-import/importers/mt.rb +257 -257
  23. data/lib/bunto-import/importers/posterous.rb +130 -130
  24. data/lib/bunto-import/importers/rss.rb +62 -62
  25. data/lib/bunto-import/importers/s9y.rb +60 -60
  26. data/lib/bunto-import/importers/s9y_database.rb +363 -0
  27. data/lib/bunto-import/importers/textpattern.rb +70 -70
  28. data/lib/bunto-import/importers/tumblr.rb +300 -289
  29. data/lib/bunto-import/importers/typo.rb +88 -88
  30. data/lib/bunto-import/importers/wordpress.rb +372 -372
  31. data/lib/bunto-import/importers/wordpressdotcom.rb +207 -207
  32. data/lib/bunto-import/util.rb +76 -76
  33. data/lib/bunto-import/version.rb +3 -3
  34. data/lib/bunto/commands/import.rb +79 -79
  35. metadata +84 -54
@@ -1,125 +1,125 @@
1
- module BuntoImport
2
- module Importers
3
- class Jrnl < Importer
4
-
5
- def self.require_deps
6
- BuntoImport.require_with_fallback(%w[
7
- time
8
- rubygems
9
- safe_yaml
10
- ])
11
- end
12
-
13
- def self.specify_options(c)
14
- c.option 'file', '--file FILENAME', 'Journal file (default: "~/journal.txt")'
15
- c.option 'time_format', '--time_format FORMAT', 'Time format of your journal (default: "%Y-%m-%d %H:%M")'
16
- c.option 'extension', '--extension EXT', 'Output extension (default: "md")'
17
- c.option 'layout', '--layout NAME', 'Output post layout (default: "post")'
18
- end
19
-
20
- # Reads a jrnl file and creates a new post for each entry
21
- # The following overrides are available:
22
- # :file path to input file
23
- # :time_format the format used by the jrnl configuration
24
- # :extension the extension format of the output files
25
- # :layout explicitly set the layout of the output
26
- def self.process(options)
27
- file = options.fetch('file', "~/journal.txt")
28
- time_format = options.fetch('time_format', "%Y-%m-%d %H:%M")
29
- extension = options.fetch('extension', "md")
30
- layout = options.fetch('layout', "post")
31
-
32
- date_length = Time.now.strftime(time_format).length
33
-
34
- # convert relative to absolute if needed
35
- file = File.expand_path(file)
36
-
37
- abort "The jrnl file was not found. Please make sure '#{file}' exists. You can specify a different file using the --file switch." unless File.file?(file)
38
-
39
- input = File.read(file)
40
- entries = input.split("\n\n");
41
-
42
- entries.each do |entry|
43
- # split dateline and body
44
- # content[0] has the date and title
45
- # content[1] has the post body
46
- content = entry.split("\n")
47
-
48
- body = get_post_content(content)
49
- date = get_date(content[0], date_length)
50
- title = get_title(content[0], date_length)
51
- slug = create_slug(title)
52
- filename = create_filename(date, slug, extension)
53
- meta = create_meta(layout, title, date) # prepare YAML meta data
54
-
55
- write_file(filename, meta, body) # write to file
56
- end
57
- end
58
-
59
- # strip body from jrnl entry
60
- def self.get_post_content(content)
61
- return content[1]
62
- end
63
-
64
- # strip timestamp from the dateline
65
- def self.get_date(content, offset)
66
- return content[0, offset]
67
- end
68
-
69
- # strip title from the dateline
70
- def self.get_title(content, offset)
71
- return content[offset + 1, content.length]
72
- end
73
-
74
- # generate slug
75
- def self.create_slug(title)
76
- return title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
77
- end
78
-
79
- # generate filename
80
- def self.create_filename(date, slug, extension)
81
- return "#{Time.parse(date).strftime("%Y-%m-%d")}-#{slug}.#{extension}"
82
- end
83
-
84
- # Prepare YAML meta data
85
- #
86
- # layout - name of the layout
87
- # title - title of the entry
88
- # date - date of entry creation
89
- #
90
- # Examples
91
- #
92
- # create_meta("post", "Entry 1", "2013-01-01 13:00")
93
- # # => "---\nlayout: post\ntitle: Entry 1\ndate: 2013-01-01 13:00\n"
94
- #
95
- # Returns array converted to YAML
96
- def self.create_meta(layout, title, date)
97
- data = {
98
- 'layout' => layout,
99
- 'title' => title,
100
- 'date' => Time.parse(date).strftime("%Y-%m-%d %H:%M %z")
101
- }.to_yaml
102
- return data;
103
- end
104
-
105
- # Writes given data to file
106
- #
107
- # filename - name of the output file
108
- # meta - YAML header data
109
- # body - jrnl entry content
110
- #
111
- # Examples
112
- #
113
- # write_file("2013-01-01-entry-1.md", "---\nlayout: post\ntitle: Entry 1\ndate: 2013-01-01 13:00\n", "This is the first entry for my new journal")
114
- #
115
- # Writes file to _posts/filename
116
- def self.write_file(filename, meta, body)
117
- File.open("_posts/#{filename}", "w") do |f|
118
- f.puts meta
119
- f.puts "---\n\n"
120
- f.puts body
121
- end
122
- end
123
- end
124
- end
125
- end
1
+ module BuntoImport
2
+ module Importers
3
+ class Jrnl < Importer
4
+
5
+ def self.require_deps
6
+ BuntoImport.require_with_fallback(%w[
7
+ time
8
+ rubygems
9
+ safe_yaml
10
+ ])
11
+ end
12
+
13
+ def self.specify_options(c)
14
+ c.option 'file', '--file FILENAME', 'Journal file (default: "~/journal.txt")'
15
+ c.option 'time_format', '--time_format FORMAT', 'Time format of your journal (default: "%Y-%m-%d %H:%M")'
16
+ c.option 'extension', '--extension EXT', 'Output extension (default: "md")'
17
+ c.option 'layout', '--layout NAME', 'Output post layout (default: "post")'
18
+ end
19
+
20
+ # Reads a jrnl file and creates a new post for each entry
21
+ # The following overrides are available:
22
+ # :file path to input file
23
+ # :time_format the format used by the jrnl configuration
24
+ # :extension the extension format of the output files
25
+ # :layout explicitly set the layout of the output
26
+ def self.process(options)
27
+ file = options.fetch('file', "~/journal.txt")
28
+ time_format = options.fetch('time_format', "%Y-%m-%d %H:%M")
29
+ extension = options.fetch('extension', "md")
30
+ layout = options.fetch('layout', "post")
31
+
32
+ date_length = Time.now.strftime(time_format).length
33
+
34
+ # convert relative to absolute if needed
35
+ file = File.expand_path(file)
36
+
37
+ abort "The jrnl file was not found. Please make sure '#{file}' exists. You can specify a different file using the --file switch." unless File.file?(file)
38
+
39
+ input = File.read(file)
40
+ entries = input.split("\n\n");
41
+
42
+ entries.each do |entry|
43
+ # split dateline and body
44
+ # content[0] has the date and title
45
+ # content[1] has the post body
46
+ content = entry.split("\n")
47
+
48
+ body = get_post_content(content)
49
+ date = get_date(content[0], date_length)
50
+ title = get_title(content[0], date_length)
51
+ slug = create_slug(title)
52
+ filename = create_filename(date, slug, extension)
53
+ meta = create_meta(layout, title, date) # prepare YAML meta data
54
+
55
+ write_file(filename, meta, body) # write to file
56
+ end
57
+ end
58
+
59
+ # strip body from jrnl entry
60
+ def self.get_post_content(content)
61
+ return content[1]
62
+ end
63
+
64
+ # strip timestamp from the dateline
65
+ def self.get_date(content, offset)
66
+ return content[0, offset]
67
+ end
68
+
69
+ # strip title from the dateline
70
+ def self.get_title(content, offset)
71
+ return content[offset + 1, content.length]
72
+ end
73
+
74
+ # generate slug
75
+ def self.create_slug(title)
76
+ return title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
77
+ end
78
+
79
+ # generate filename
80
+ def self.create_filename(date, slug, extension)
81
+ return "#{Time.parse(date).strftime("%Y-%m-%d")}-#{slug}.#{extension}"
82
+ end
83
+
84
+ # Prepare YAML meta data
85
+ #
86
+ # layout - name of the layout
87
+ # title - title of the entry
88
+ # date - date of entry creation
89
+ #
90
+ # Examples
91
+ #
92
+ # create_meta("post", "Entry 1", "2013-01-01 13:00")
93
+ # # => "---\nlayout: post\ntitle: Entry 1\ndate: 2013-01-01 13:00\n"
94
+ #
95
+ # Returns array converted to YAML
96
+ def self.create_meta(layout, title, date)
97
+ data = {
98
+ 'layout' => layout,
99
+ 'title' => title,
100
+ 'date' => Time.parse(date).strftime("%Y-%m-%d %H:%M %z")
101
+ }.to_yaml
102
+ return data;
103
+ end
104
+
105
+ # Writes given data to file
106
+ #
107
+ # filename - name of the output file
108
+ # meta - YAML header data
109
+ # body - jrnl entry content
110
+ #
111
+ # Examples
112
+ #
113
+ # write_file("2013-01-01-entry-1.md", "---\nlayout: post\ntitle: Entry 1\ndate: 2013-01-01 13:00\n", "This is the first entry for my new journal")
114
+ #
115
+ # Writes file to _posts/filename
116
+ def self.write_file(filename, meta, body)
117
+ File.open("_posts/#{filename}", "w") do |f|
118
+ f.puts meta
119
+ f.puts "---\n\n"
120
+ f.puts body
121
+ end
122
+ end
123
+ end
124
+ end
125
+ end
@@ -1,72 +1,72 @@
1
- module BuntoImport
2
- module Importers
3
- class Marley < Importer
4
- def self.validate(options)
5
- if options['marley_data_dir'].nil?
6
- Bunto.logger.abort_with "Missing mandatory option --marley_data_dir."
7
- else
8
- unless File.directory?(options['marley_data_dir'])
9
- raise ArgumentError, "marley dir '#{options['marley_data_dir']}' not found"
10
- end
11
- end
12
- end
13
-
14
- def self.regexp
15
- { :id => /^\d{0,4}-{0,1}(.*)$/,
16
- :title => /^#\s*(.*)\s+$/,
17
- :title_with_date => /^#\s*(.*)\s+\(([0-9\/]+)\)$/,
18
- :published_on => /.*\s+\(([0-9\/]+)\)$/,
19
- :perex => /^([^\#\n]+\n)$/,
20
- :meta => /^\{\{\n(.*)\}\}\n$/mi # Multiline Regexp
21
- }
22
- end
23
-
24
- def self.require_deps
25
- BuntoImport.require_with_fallback(%w[
26
- fileutils
27
- safe_yaml
28
- ])
29
- end
30
-
31
- def self.specify_options(c)
32
- c.option 'marley_data_dir', '--marley_data_dir DIR', 'The dir containing your marley data'
33
- end
34
-
35
- def self.process(options)
36
- marley_data_dir = options.fetch('marley_data_dir')
37
-
38
- FileUtils.mkdir_p "_posts"
39
-
40
- posts = 0
41
- Dir["#{marley_data_dir}/**/*.txt"].each do |f|
42
- next unless File.exists?(f)
43
-
44
- #copied over from marley's app/lib/post.rb
45
- file_content = File.read(f)
46
- meta_content = file_content.slice!( self.regexp[:meta] )
47
- body = file_content.sub( self.regexp[:title], '').sub( self.regexp[:perex], '').strip
48
-
49
- title = file_content.scan( self.regexp[:title] ).first.to_s.strip
50
- prerex = file_content.scan( self.regexp[:perex] ).first.to_s.strip
51
- published_on = DateTime.parse( post[:published_on] ) rescue File.mtime( File.dirname(f) )
52
- meta = ( meta_content ) ? YAML::load( meta_content.scan( self.regexp[:meta]).to_s ) : {}
53
- meta['title'] = title
54
- meta['layout'] = 'post'
55
-
56
- formatted_date = published_on.strftime('%Y-%m-%d')
57
- post_name = File.dirname(f).split(%r{/}).last.gsub(/\A\d+-/, '')
58
-
59
- name = "#{formatted_date}-#{post_name}"
60
- File.open("_posts/#{name}.markdown", "w") do |f|
61
- f.puts meta.to_yaml
62
- f.puts "---\n"
63
- f.puts "\n#{prerex}\n\n" if prerex
64
- f.puts body
65
- end
66
- posts += 1
67
- end
68
- "Created #{posts} posts!"
69
- end
70
- end
71
- end
72
- end
1
+ module BuntoImport
2
+ module Importers
3
+ class Marley < Importer
4
+ def self.validate(options)
5
+ if options['marley_data_dir'].nil?
6
+ Bunto.logger.abort_with "Missing mandatory option --marley_data_dir."
7
+ else
8
+ unless File.directory?(options['marley_data_dir'])
9
+ raise ArgumentError, "marley dir '#{options['marley_data_dir']}' not found"
10
+ end
11
+ end
12
+ end
13
+
14
+ def self.regexp
15
+ { :id => /^\d{0,4}-{0,1}(.*)$/,
16
+ :title => /^#\s*(.*)\s+$/,
17
+ :title_with_date => /^#\s*(.*)\s+\(([0-9\/]+)\)$/,
18
+ :published_on => /.*\s+\(([0-9\/]+)\)$/,
19
+ :perex => /^([^\#\n]+\n)$/,
20
+ :meta => /^\{\{\n(.*)\}\}\n$/mi # Multiline Regexp
21
+ }
22
+ end
23
+
24
+ def self.require_deps
25
+ BuntoImport.require_with_fallback(%w[
26
+ fileutils
27
+ safe_yaml
28
+ ])
29
+ end
30
+
31
+ def self.specify_options(c)
32
+ c.option 'marley_data_dir', '--marley_data_dir DIR', 'The dir containing your marley data'
33
+ end
34
+
35
+ def self.process(options)
36
+ marley_data_dir = options.fetch('marley_data_dir')
37
+
38
+ FileUtils.mkdir_p "_posts"
39
+
40
+ posts = 0
41
+ Dir["#{marley_data_dir}/**/*.txt"].each do |f|
42
+ next unless File.exists?(f)
43
+
44
+ #copied over from marley's app/lib/post.rb
45
+ file_content = File.read(f)
46
+ meta_content = file_content.slice!( self.regexp[:meta] )
47
+ body = file_content.sub( self.regexp[:title], '').sub( self.regexp[:perex], '').strip
48
+
49
+ title = file_content.scan( self.regexp[:title] ).first.to_s.strip
50
+ prerex = file_content.scan( self.regexp[:perex] ).first.to_s.strip
51
+ published_on = DateTime.parse( post[:published_on] ) rescue File.mtime( File.dirname(f) )
52
+ meta = ( meta_content ) ? YAML::load( meta_content.scan( self.regexp[:meta]).to_s ) : {}
53
+ meta['title'] = title
54
+ meta['layout'] = 'post'
55
+
56
+ formatted_date = published_on.strftime('%Y-%m-%d')
57
+ post_name = File.dirname(f).split(%r{/}).last.gsub(/\A\d+-/, '')
58
+
59
+ name = "#{formatted_date}-#{post_name}"
60
+ File.open("_posts/#{name}.markdown", "w") do |f|
61
+ f.puts meta.to_yaml
62
+ f.puts "---\n"
63
+ f.puts "\n#{prerex}\n\n" if prerex
64
+ f.puts body
65
+ end
66
+ posts += 1
67
+ end
68
+ "Created #{posts} posts!"
69
+ end
70
+ end
71
+ end
72
+ end
@@ -1,99 +1,99 @@
1
- module BuntoImport
2
- module Importers
3
- class Mephisto < Importer
4
- #Accepts a hash with database config variables, exports mephisto posts into a csv
5
- #export PGPASSWORD if you must
6
- def self.postgres(c)
7
- sql = <<-SQL
8
- BEGIN;
9
- CREATE TEMP TABLE bunto AS
10
- SELECT title, permalink, body, published_at, filter FROM contents
11
- WHERE user_id = 1 AND type = 'Article' ORDER BY published_at;
12
- COPY bunto TO STDOUT WITH CSV HEADER;
13
- ROLLBACK;
14
- SQL
15
- command = %Q(psql -h #{c[:host] || "localhost"} -c "#{sql.strip}" #{c[:database]} #{c[:username]} -o #{c[:filename] || "posts.csv"})
16
- puts command
17
- `#{command}`
18
- CSV.process
19
- end
20
-
21
- def self.validate(options)
22
- %w[dbname user].each do |option|
23
- if options[option].nil?
24
- abort "Missing mandatory option --#{option}."
25
- end
26
- end
27
- end
28
-
29
- def self.require_deps
30
- BuntoImport.require_with_fallback(%w[
31
- rubygems
32
- sequel
33
- fastercsv
34
- fileutils
35
- ])
36
- end
37
-
38
- def self.specify_options(c)
39
- c.option 'dbname', '--dbname DB', 'Database name'
40
- c.option 'user', '--user USER', 'Database user name'
41
- c.option 'password', '--password PW', "Database user's password (default: '')"
42
- c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
43
- end
44
-
45
- # This query will pull blog posts from all entries across all blogs. If
46
- # you've got unpublished, deleted or otherwise hidden posts please sift
47
- # through the created posts to make sure nothing is accidently published.
48
- QUERY = "SELECT id, \
49
- permalink, \
50
- body, \
51
- published_at, \
52
- title \
53
- FROM contents \
54
- WHERE user_id = 1 AND \
55
- type = 'Article' AND \
56
- published_at IS NOT NULL \
57
- ORDER BY published_at"
58
-
59
- def self.process(options)
60
- dbname = options.fetch('dbname')
61
- user = options.fetch('user')
62
- pass = options.fetch('password', '')
63
- host = options.fetch('host', "localhost")
64
-
65
- db = Sequel.mysql(dbname, :user => user,
66
- :password => pass,
67
- :host => host,
68
- :encoding => 'utf8')
69
-
70
- FileUtils.mkdir_p "_posts"
71
-
72
- db[QUERY].each do |post|
73
- title = post[:title]
74
- slug = post[:permalink]
75
- date = post[:published_at]
76
- content = post[:body]
77
-
78
- # Ideally, this script would determine the post format (markdown,
79
- # html, etc) and create files with proper extensions. At this point
80
- # it just assumes that markdown will be acceptable.
81
- name = [date.year, date.month, date.day, slug].join('-') + ".markdown"
82
-
83
- data = {
84
- 'layout' => 'post',
85
- 'title' => title.to_s,
86
- 'mt_id' => post[:entry_id],
87
- }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
88
-
89
- File.open("_posts/#{name}", "w") do |f|
90
- f.puts data
91
- f.puts "---"
92
- f.puts content
93
- end
94
- end
95
-
96
- end
97
- end
98
- end
99
- end
1
+ module BuntoImport
2
+ module Importers
3
+ class Mephisto < Importer
4
+ #Accepts a hash with database config variables, exports mephisto posts into a csv
5
+ #export PGPASSWORD if you must
6
+ def self.postgres(c)
7
+ sql = <<-SQL
8
+ BEGIN;
9
+ CREATE TEMP TABLE bunto AS
10
+ SELECT title, permalink, body, published_at, filter FROM contents
11
+ WHERE user_id = 1 AND type = 'Article' ORDER BY published_at;
12
+ COPY bunto TO STDOUT WITH CSV HEADER;
13
+ ROLLBACK;
14
+ SQL
15
+ command = %Q(psql -h #{c[:host] || "localhost"} -c "#{sql.strip}" #{c[:database]} #{c[:username]} -o #{c[:filename] || "posts.csv"})
16
+ puts command
17
+ `#{command}`
18
+ CSV.process
19
+ end
20
+
21
+ def self.validate(options)
22
+ %w[dbname user].each do |option|
23
+ if options[option].nil?
24
+ abort "Missing mandatory option --#{option}."
25
+ end
26
+ end
27
+ end
28
+
29
+ def self.require_deps
30
+ BuntoImport.require_with_fallback(%w[
31
+ rubygems
32
+ sequel
33
+ fastercsv
34
+ fileutils
35
+ ])
36
+ end
37
+
38
+ def self.specify_options(c)
39
+ c.option 'dbname', '--dbname DB', 'Database name'
40
+ c.option 'user', '--user USER', 'Database user name'
41
+ c.option 'password', '--password PW', "Database user's password (default: '')"
42
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
43
+ end
44
+
45
+ # This query will pull blog posts from all entries across all blogs. If
46
+ # you've got unpublished, deleted or otherwise hidden posts please sift
47
+ # through the created posts to make sure nothing is accidently published.
48
+ QUERY = "SELECT id, \
49
+ permalink, \
50
+ body, \
51
+ published_at, \
52
+ title \
53
+ FROM contents \
54
+ WHERE user_id = 1 AND \
55
+ type = 'Article' AND \
56
+ published_at IS NOT NULL \
57
+ ORDER BY published_at"
58
+
59
+ def self.process(options)
60
+ dbname = options.fetch('dbname')
61
+ user = options.fetch('user')
62
+ pass = options.fetch('password', '')
63
+ host = options.fetch('host', "localhost")
64
+
65
+ db = Sequel.mysql(dbname, :user => user,
66
+ :password => pass,
67
+ :host => host,
68
+ :encoding => 'utf8')
69
+
70
+ FileUtils.mkdir_p "_posts"
71
+
72
+ db[QUERY].each do |post|
73
+ title = post[:title]
74
+ slug = post[:permalink]
75
+ date = post[:published_at]
76
+ content = post[:body]
77
+
78
+ # Ideally, this script would determine the post format (markdown,
79
+ # html, etc) and create files with proper extensions. At this point
80
+ # it just assumes that markdown will be acceptable.
81
+ name = [date.year, date.month, date.day, slug].join('-') + ".markdown"
82
+
83
+ data = {
84
+ 'layout' => 'post',
85
+ 'title' => title.to_s,
86
+ 'mt_id' => post[:entry_id],
87
+ }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
88
+
89
+ File.open("_posts/#{name}", "w") do |f|
90
+ f.puts data
91
+ f.puts "---"
92
+ f.puts content
93
+ end
94
+ end
95
+
96
+ end
97
+ end
98
+ end
99
+ end