bunto-import 2.0.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +21 -21
  3. data/README.markdown +33 -33
  4. data/lib/bunto-import.rb +49 -49
  5. data/lib/bunto-import/importer.rb +26 -26
  6. data/lib/bunto-import/importers.rb +10 -10
  7. data/lib/bunto-import/importers/behance.rb +80 -80
  8. data/lib/bunto-import/importers/blogger.rb +330 -264
  9. data/lib/bunto-import/importers/csv.rb +96 -96
  10. data/lib/bunto-import/importers/drupal6.rb +53 -139
  11. data/lib/bunto-import/importers/drupal7.rb +54 -111
  12. data/lib/bunto-import/importers/drupal_common.rb +157 -0
  13. data/lib/bunto-import/importers/easyblog.rb +96 -96
  14. data/lib/bunto-import/importers/enki.rb +74 -74
  15. data/lib/bunto-import/importers/ghost.rb +68 -68
  16. data/lib/bunto-import/importers/google_reader.rb +64 -64
  17. data/lib/bunto-import/importers/joomla.rb +92 -90
  18. data/lib/bunto-import/importers/joomla3.rb +91 -91
  19. data/lib/bunto-import/importers/jrnl.rb +125 -125
  20. data/lib/bunto-import/importers/marley.rb +72 -72
  21. data/lib/bunto-import/importers/mephisto.rb +99 -99
  22. data/lib/bunto-import/importers/mt.rb +257 -257
  23. data/lib/bunto-import/importers/posterous.rb +130 -130
  24. data/lib/bunto-import/importers/rss.rb +62 -62
  25. data/lib/bunto-import/importers/s9y.rb +60 -60
  26. data/lib/bunto-import/importers/s9y_database.rb +363 -0
  27. data/lib/bunto-import/importers/textpattern.rb +70 -70
  28. data/lib/bunto-import/importers/tumblr.rb +300 -289
  29. data/lib/bunto-import/importers/typo.rb +88 -88
  30. data/lib/bunto-import/importers/wordpress.rb +372 -372
  31. data/lib/bunto-import/importers/wordpressdotcom.rb +207 -207
  32. data/lib/bunto-import/util.rb +76 -76
  33. data/lib/bunto-import/version.rb +3 -3
  34. data/lib/bunto/commands/import.rb +79 -79
  35. metadata +84 -54
@@ -1,96 +1,96 @@
1
- # encoding: UTF-8
2
-
3
- module BuntoImport
4
- module Importers
5
- class CSV < Importer
6
- def self.require_deps
7
- BuntoImport.require_with_fallback(%w[
8
- csv
9
- fileutils
10
- yaml
11
- ])
12
- end
13
-
14
- def self.specify_options(c)
15
- c.option 'file', '--file NAME', 'The CSV file to import (default: "posts.csv")'
16
- c.option 'no-front-matter', '--no-front-matter', 'Do not add the default front matter to the post body'
17
- end
18
-
19
- # Reads a csv with title, permalink, body, published_at, and filter.
20
- # It creates a post file for each row in the csv
21
- def self.process(options)
22
- file = options.fetch('file', "posts.csv")
23
-
24
- FileUtils.mkdir_p "_posts"
25
- posts = 0
26
- abort "Cannot find the file '#{file}'. Aborting." unless File.file?(file)
27
-
28
- ::CSV.foreach(file) do |row|
29
- next if row[0] == "title" # header
30
- posts += 1
31
- write_post(CSVPost.new(row), options)
32
- end
33
- Bunto.logger.info "Created #{posts} posts!"
34
- end
35
-
36
- class CSVPost
37
- attr_reader :title, :permalink, :body, :markup
38
-
39
- MissingDataError = Class.new(RuntimeError)
40
-
41
- # Creates a CSVPost
42
- #
43
- # row - Array of data, length of 4 or 5 with the columns:
44
- #
45
- # 1. title
46
- # 2. permalink
47
- # 3. body
48
- # 4. published_at
49
- # 5. markup (markdown, textile)
50
- def initialize(row)
51
- @title = row[0] || missing_data("Post title not present in first column.")
52
- @permalink = row[1] || missing_data("Post permalink not present in second column.")
53
- @body = row[2] || missing_data("Post body not present in third column.")
54
- @published_at = row[3] || missing_data("Post publish date not present in fourth column.")
55
- @markup = row[4] || "markdown"
56
- end
57
-
58
- def published_at
59
- if @published_at && !@published_at.is_a?(DateTime)
60
- @published_at = DateTime.parse(@published_at)
61
- else
62
- @published_at
63
- end
64
- end
65
-
66
- def filename
67
- "#{published_at.strftime("%Y-%m-%d")}-#{File.basename(permalink, ".*")}.#{markup}"
68
- end
69
-
70
- def missing_data(message)
71
- raise MissingDataError, message
72
- end
73
- end
74
-
75
- def self.write_post(post, options = {})
76
- File.open(File.join("_posts", post.filename), "w") do |f|
77
- write_frontmatter(f, post, options)
78
- f.puts post.body
79
- end
80
- end
81
-
82
- def self.write_frontmatter(f, post, options)
83
- no_frontmatter = options.fetch('no-front-matter', false)
84
- unless no_frontmatter
85
- f.puts YAML.dump({
86
- "layout" => "post",
87
- "title" => post.title,
88
- "date" => post.published_at.to_s,
89
- "permalink" => post.permalink
90
- })
91
- f.puts "---"
92
- end
93
- end
94
- end
95
- end
96
- end
1
+ # encoding: UTF-8
2
+
3
+ module BuntoImport
4
+ module Importers
5
+ class CSV < Importer
6
+ def self.require_deps
7
+ BuntoImport.require_with_fallback(%w[
8
+ csv
9
+ fileutils
10
+ yaml
11
+ ])
12
+ end
13
+
14
+ def self.specify_options(c)
15
+ c.option 'file', '--file NAME', 'The CSV file to import (default: "posts.csv")'
16
+ c.option 'no-front-matter', '--no-front-matter', 'Do not add the default front matter to the post body'
17
+ end
18
+
19
+ # Reads a csv with title, permalink, body, published_at, and filter.
20
+ # It creates a post file for each row in the csv
21
+ def self.process(options)
22
+ file = options.fetch('file', "posts.csv")
23
+
24
+ FileUtils.mkdir_p "_posts"
25
+ posts = 0
26
+ abort "Cannot find the file '#{file}'. Aborting." unless File.file?(file)
27
+
28
+ ::CSV.foreach(file) do |row|
29
+ next if row[0] == "title" # header
30
+ posts += 1
31
+ write_post(CSVPost.new(row), options)
32
+ end
33
+ Bunto.logger.info "Created #{posts} posts!"
34
+ end
35
+
36
+ class CSVPost
37
+ attr_reader :title, :permalink, :body, :markup
38
+
39
+ MissingDataError = Class.new(RuntimeError)
40
+
41
+ # Creates a CSVPost
42
+ #
43
+ # row - Array of data, length of 4 or 5 with the columns:
44
+ #
45
+ # 1. title
46
+ # 2. permalink
47
+ # 3. body
48
+ # 4. published_at
49
+ # 5. markup (markdown, textile)
50
+ def initialize(row)
51
+ @title = row[0] || missing_data("Post title not present in first column.")
52
+ @permalink = row[1] || missing_data("Post permalink not present in second column.")
53
+ @body = row[2] || missing_data("Post body not present in third column.")
54
+ @published_at = row[3] || missing_data("Post publish date not present in fourth column.")
55
+ @markup = row[4] || "markdown"
56
+ end
57
+
58
+ def published_at
59
+ if @published_at && !@published_at.is_a?(DateTime)
60
+ @published_at = DateTime.parse(@published_at)
61
+ else
62
+ @published_at
63
+ end
64
+ end
65
+
66
+ def filename
67
+ "#{published_at.strftime("%Y-%m-%d")}-#{File.basename(permalink, ".*")}.#{markup}"
68
+ end
69
+
70
+ def missing_data(message)
71
+ raise MissingDataError, message
72
+ end
73
+ end
74
+
75
+ def self.write_post(post, options = {})
76
+ File.open(File.join("_posts", post.filename), "w") do |f|
77
+ write_frontmatter(f, post, options)
78
+ f.puts post.body
79
+ end
80
+ end
81
+
82
+ def self.write_frontmatter(f, post, options)
83
+ no_frontmatter = options.fetch('no-front-matter', false)
84
+ unless no_frontmatter
85
+ f.puts YAML.dump({
86
+ "layout" => "post",
87
+ "title" => post.title,
88
+ "date" => post.published_at.to_s,
89
+ "permalink" => post.permalink
90
+ })
91
+ f.puts "---"
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
@@ -1,139 +1,53 @@
1
- module BuntoImport
2
- module Importers
3
- class Drupal6 < Importer
4
- # Reads a MySQL database via Sequel and creates a post file for each story
5
- # and blog node.
6
- QUERY = "SELECT n.nid, \
7
- n.title, \
8
- nr.body, \
9
- n.created, \
10
- n.status, \
11
- GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags' \
12
- FROM node_revisions AS nr, \
13
- node AS n \
14
- LEFT OUTER JOIN term_node AS tn ON tn.nid = n.nid \
15
- LEFT OUTER JOIN term_data AS td ON tn.tid = td.tid \
16
- WHERE (%types%) \
17
- AND n.vid = nr.vid \
18
- GROUP BY n.nid"
19
-
20
- def self.validate(options)
21
- %w[dbname user].each do |option|
22
- if options[option].nil?
23
- abort "Missing mandatory option --#{option}."
24
- end
25
- end
26
- end
27
-
28
- def self.specify_options(c)
29
- c.option 'dbname', '--dbname DB', 'Database name'
30
- c.option 'user', '--user USER', 'Database user name'
31
- c.option 'password', '--password PW', "Database user's password (default: '')"
32
- c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
33
- c.option 'prefix', '--prefix PREFIX', 'Table prefix name'
34
- c.option 'types', '--types TYPE1[,TYPE2[,TYPE3...]]', Array, 'The Drupal content types to be imported.'
35
- end
36
-
37
- def self.require_deps
38
- BuntoImport.require_with_fallback(%w[
39
- rubygems
40
- sequel
41
- fileutils
42
- safe_yaml
43
- mysql
44
- ])
45
- end
46
-
47
- def self.process(options)
48
- dbname = options.fetch('dbname')
49
- user = options.fetch('user')
50
- pass = options.fetch('password', "")
51
- host = options.fetch('host', "localhost")
52
- prefix = options.fetch('prefix', "")
53
- types = options.fetch('types', ['blog', 'story', 'article'])
54
-
55
- db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
56
-
57
- if prefix != ''
58
- QUERY[" node "] = " " + prefix + "node "
59
- QUERY[" node_revisions "] = " " + prefix + "node_revisions "
60
- QUERY[" term_node "] = " " + prefix + "term_node "
61
- QUERY[" term_data "] = " " + prefix + "term_data "
62
- end
63
-
64
- types = types.join("' OR n.type = '")
65
- QUERY[" WHERE (%types%) "] = " WHERE (n.type = '#{types}') "
66
-
67
- FileUtils.mkdir_p "_posts"
68
- FileUtils.mkdir_p "_drafts"
69
- FileUtils.mkdir_p "_layouts"
70
-
71
- # Create the refresh layout
72
- # Change the refresh url if you customized your permalink config
73
- File.open("_layouts/refresh.html", "w") do |f|
74
- f.puts <<EOF
75
- <!DOCTYPE html>
76
- <html>
77
- <head>
78
- <meta http-equiv="content-type" content="text/html; charset=utf-8" />
79
- <meta http-equiv="refresh" content="0;url={{ page.refresh_to_post_id }}.html" />
80
- </head>
81
- </html>
82
- EOF
83
- end
84
-
85
- db[QUERY].each do |post|
86
- # Get required fields and construct Bunto compatible name
87
- node_id = post[:nid]
88
- title = post[:title]
89
- content = post[:body]
90
- tags = (post[:tags] || '').downcase.strip
91
- created = post[:created]
92
- time = Time.at(created)
93
- is_published = post[:status] == 1
94
- dir = is_published ? "_posts" : "_drafts"
95
- slug = title.strip.downcase.gsub(/(&|&amp;)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
96
- name = time.strftime("%Y-%m-%d-") + slug + '.md'
97
-
98
- # Get the relevant fields as a hash, delete empty fields and convert
99
- # to YAML for the header
100
- data = {
101
- 'layout' => 'post',
102
- 'title' => title.to_s,
103
- 'created' => created,
104
- 'categories' => tags.split('|')
105
- }.delete_if { |k,v| v.nil? || v == ''}.each_pair {
106
- |k,v| ((v.is_a? String) ? v.force_encoding("UTF-8") : v)
107
- }.to_yaml
108
-
109
- # Write out the data and content to file
110
- File.open("#{dir}/#{name}", "w") do |f|
111
- f.puts data
112
- f.puts "---"
113
- f.puts content
114
- end
115
-
116
- # Make a file to redirect from the old Drupal URL
117
- if is_published
118
- aliases = db["SELECT dst FROM #{prefix}url_alias WHERE src = ?", "node/#{node_id}"].all
119
-
120
- aliases.push(:dst => "node/#{node_id}")
121
-
122
- aliases.each do |url_alias|
123
- FileUtils.mkdir_p url_alias[:dst]
124
- File.open("#{url_alias[:dst]}/index.md", "w") do |f|
125
- f.puts "---"
126
- f.puts "layout: refresh"
127
- f.puts "refresh_to_post_id: /#{time.strftime("%Y/%m/%d/") + slug}"
128
- f.puts "---"
129
- end
130
- end
131
- end
132
- end
133
-
134
- # TODO: Make dirs & files for nodes of type 'page'
135
- # Make refresh pages for these as well
136
- end
137
- end
138
- end
139
- end
1
+ require 'bunto-import/importers/drupal_common'
2
+
3
+ module BuntoImport
4
+ module Importers
5
+ class Drupal6 < Importer
6
+ include DrupalCommon
7
+ extend DrupalCommon::ClassMethods
8
+
9
+ def self.build_query(prefix, types)
10
+ types = types.join("' OR n.type = '")
11
+ types = "n.type = '#{types}'"
12
+
13
+ query = <<EOS
14
+ SELECT n.nid,
15
+ n.title,
16
+ nr.body,
17
+ nr.teaser,
18
+ n.created,
19
+ n.status,
20
+ n.type,
21
+ GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags'
22
+ FROM #{prefix}node_revisions AS nr,
23
+ #{prefix}node AS n
24
+ LEFT OUTER JOIN #{prefix}term_node AS tn ON tn.nid = n.nid
25
+ LEFT OUTER JOIN #{prefix}term_data AS td ON tn.tid = td.tid
26
+ WHERE (#{types})
27
+ AND n.vid = nr.vid
28
+ GROUP BY n.nid
29
+ EOS
30
+
31
+ return query
32
+ end
33
+
34
+ def self.aliases_query(prefix)
35
+ "SELECT src AS source, dst AS alias FROM #{prefix}url_alias WHERE src = ?"
36
+ end
37
+
38
+ def self.post_data(sql_post_data)
39
+ content = sql_post_data[:body].to_s
40
+ summary = sql_post_data[:teaser].to_s
41
+ tags = (sql_post_data[:tags] || '').downcase.strip
42
+
43
+ data = {
44
+ 'excerpt' => summary,
45
+ 'categories' => tags.split('|')
46
+ }
47
+
48
+ return data, content
49
+ end
50
+
51
+ end
52
+ end
53
+ end
@@ -1,111 +1,54 @@
1
- module BuntoImport
2
- module Importers
3
- class Drupal7 < Importer
4
- # Reads a MySQL database via Sequel and creates a post file for each story
5
- # and blog node.
6
- QUERY = "SELECT n.title, \
7
- fdb.body_value, \
8
- fdb.body_summary, \
9
- n.created, \
10
- n.status, \
11
- n.nid, \
12
- u.name \
13
- FROM node AS n, \
14
- field_data_body AS fdb, \
15
- users AS u \
16
- WHERE (%types%) \
17
- AND n.nid = fdb.entity_id \
18
- AND n.vid = fdb.revision_id
19
- AND n.uid = u.uid"
20
-
21
- def self.validate(options)
22
- %w[dbname user].each do |option|
23
- if options[option].nil?
24
- abort "Missing mandatory option --#{option}."
25
- end
26
- end
27
- end
28
-
29
- def self.specify_options(c)
30
- c.option 'dbname', '--dbname DB', 'Database name'
31
- c.option 'user', '--user USER', 'Database user name'
32
- c.option 'password', '--password PW', 'Database user\'s password (default: "")'
33
- c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
34
- c.option 'prefix', '--prefix PREFIX', 'Table prefix name'
35
- c.option 'types', '--types TYPE1[,TYPE2[,TYPE3...]]', Array, 'The Drupal content types to be imported.'
36
- end
37
-
38
- def self.require_deps
39
- BuntoImport.require_with_fallback(%w[
40
- rubygems
41
- sequel
42
- fileutils
43
- safe_yaml
44
- ])
45
- end
46
-
47
- def self.process(options)
48
- dbname = options.fetch('dbname')
49
- user = options.fetch('user')
50
- pass = options.fetch('password', "")
51
- host = options.fetch('host', "localhost")
52
- prefix = options.fetch('prefix', "")
53
- types = options.fetch('types', ['blog', 'story', 'article'])
54
-
55
- db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
56
-
57
- unless prefix.empty?
58
- QUERY[" node "] = " " + prefix + "node "
59
- QUERY[" field_data_body "] = " " + prefix + "field_data_body "
60
- QUERY[" users "] = " " + prefix + "users "
61
- end
62
-
63
- types = types.join("' OR n.type = '")
64
- QUERY[" WHERE (%types%) "] = " WHERE (n.type = '#{types}') "
65
-
66
- FileUtils.mkdir_p "_posts"
67
- FileUtils.mkdir_p "_drafts"
68
- FileUtils.mkdir_p "_layouts"
69
-
70
- db[QUERY].each do |post|
71
- # Get required fields and construct Bunto compatible name
72
- title = post[:title]
73
- content = post[:body_value]
74
- summary = post[:body_summary]
75
- created = post[:created]
76
- author = post[:name]
77
- nid = post[:nid]
78
- time = Time.at(created)
79
- is_published = post[:status] == 1
80
- dir = is_published ? "_posts" : "_drafts"
81
- slug = title.strip.downcase.gsub(/(&|&amp;)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
82
- name = time.strftime("%Y-%m-%d-") + slug + '.md'
83
-
84
- # Get the relevant fields as a hash, delete empty fields and convert
85
- # to YAML for the header
86
- data = {
87
- 'layout' => 'post',
88
- 'title' => title.strip.force_encoding("UTF-8"),
89
- 'author' => author,
90
- 'nid' => nid,
91
- 'created' => created,
92
- 'excerpt' => summary
93
- }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
94
-
95
- # Write out the data and content to file
96
- File.open("#{dir}/#{name}", "w") do |f|
97
- f.puts data
98
- f.puts "---"
99
- f.puts content
100
- end
101
-
102
- end
103
-
104
- # TODO: Make dirs & files for nodes of type 'page'
105
- # Make refresh pages for these as well
106
-
107
- # TODO: Make refresh dirs & files according to entries in url_alias table
108
- end
109
- end
110
- end
111
- end
1
+ require 'bunto-import/importers/drupal_common'
2
+
3
+ module BuntoImport
4
+ module Importers
5
+ class Drupal7 < Importer
6
+ include DrupalCommon
7
+ extend DrupalCommon::ClassMethods
8
+
9
+ def self.build_query(prefix, types)
10
+ types = types.join("' OR n.type = '")
11
+ types = "n.type = '#{types}'"
12
+
13
+ query = <<EOS
14
+ SELECT n.nid,
15
+ n.title,
16
+ fdb.body_value,
17
+ fdb.body_summary,
18
+ n.created,
19
+ n.status,
20
+ n.type,
21
+ GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags'
22
+ FROM #{prefix}field_data_body AS fdb,
23
+ #{prefix}node AS n
24
+ LEFT OUTER JOIN #{prefix}taxonomy_index AS ti ON ti.nid = n.nid
25
+ LEFT OUTER JOIN #{prefix}taxonomy_term_data AS td ON ti.tid = td.tid
26
+ WHERE (#{types})
27
+ AND n.nid = fdb.entity_id
28
+ AND n.vid = fdb.revision_id
29
+ GROUP BY n.nid"
30
+ EOS
31
+
32
+ return query
33
+ end
34
+
35
+ def self.aliases_query(prefix)
36
+ "SELECT source, alias FROM #{prefix}url_alias WHERE source = ?"
37
+ end
38
+
39
+ def self.post_data(sql_post_data)
40
+ content = sql_post_data[:body_value].to_s
41
+ summary = sql_post_data[:body_summary].to_s
42
+ tags = (sql_post_data[:tags] || '').downcase.strip
43
+
44
+ data = {
45
+ 'excerpt' => summary,
46
+ 'categories' => tags.split('|')
47
+ }
48
+
49
+ return data, content
50
+ end
51
+
52
+ end
53
+ end
54
+ end