bunto-import 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,96 @@
1
+ # encoding: UTF-8
2
+
3
+ module BuntoImport
4
+ module Importers
5
+ class CSV < Importer
6
+ def self.require_deps
7
+ BuntoImport.require_with_fallback(%w[
8
+ csv
9
+ fileutils
10
+ yaml
11
+ ])
12
+ end
13
+
14
+ def self.specify_options(c)
15
+ c.option 'file', '--file NAME', 'The CSV file to import (default: "posts.csv")'
16
+ c.option 'no-front-matter', '--no-front-matter', 'Do not add the default front matter to the post body'
17
+ end
18
+
19
+ # Reads a csv with title, permalink, body, published_at, and filter.
20
+ # It creates a post file for each row in the csv
21
+ def self.process(options)
22
+ file = options.fetch('file', "posts.csv")
23
+
24
+ FileUtils.mkdir_p "_posts"
25
+ posts = 0
26
+ abort "Cannot find the file '#{file}'. Aborting." unless File.file?(file)
27
+
28
+ ::CSV.foreach(file) do |row|
29
+ next if row[0] == "title" # header
30
+ posts += 1
31
+ write_post(CSVPost.new(row), options)
32
+ end
33
+ Bunto.logger.info "Created #{posts} posts!"
34
+ end
35
+
36
+ class CSVPost
37
+ attr_reader :title, :permalink, :body, :markup
38
+
39
+ MissingDataError = Class.new(RuntimeError)
40
+
41
+ # Creates a CSVPost
42
+ #
43
+ # row - Array of data, length of 4 or 5 with the columns:
44
+ #
45
+ # 1. title
46
+ # 2. permalink
47
+ # 3. body
48
+ # 4. published_at
49
+ # 5. markup (markdown, textile)
50
+ def initialize(row)
51
+ @title = row[0] || missing_data("Post title not present in first column.")
52
+ @permalink = row[1] || missing_data("Post permalink not present in second column.")
53
+ @body = row[2] || missing_data("Post body not present in third column.")
54
+ @published_at = row[3] || missing_data("Post publish date not present in fourth column.")
55
+ @markup = row[4] || "markdown"
56
+ end
57
+
58
+ def published_at
59
+ if @published_at && !@published_at.is_a?(DateTime)
60
+ @published_at = DateTime.parse(@published_at)
61
+ else
62
+ @published_at
63
+ end
64
+ end
65
+
66
+ def filename
67
+ "#{published_at.strftime("%Y-%m-%d")}-#{File.basename(permalink, ".*")}.#{markup}"
68
+ end
69
+
70
+ def missing_data(message)
71
+ raise MissingDataError, message
72
+ end
73
+ end
74
+
75
+ def self.write_post(post, options = {})
76
+ File.open(File.join("_posts", post.filename), "w") do |f|
77
+ write_frontmatter(f, post, options)
78
+ f.puts post.body
79
+ end
80
+ end
81
+
82
+ def self.write_frontmatter(f, post, options)
83
+ no_frontmatter = options.fetch('no-front-matter', false)
84
+ unless no_frontmatter
85
+ f.puts YAML.dump({
86
+ "layout" => "post",
87
+ "title" => post.title,
88
+ "date" => post.published_at.to_s,
89
+ "permalink" => post.permalink
90
+ })
91
+ f.puts "---"
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,139 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Drupal6 < Importer
4
+ # Reads a MySQL database via Sequel and creates a post file for each story
5
+ # and blog node.
6
+ QUERY = "SELECT n.nid, \
7
+ n.title, \
8
+ nr.body, \
9
+ n.created, \
10
+ n.status, \
11
+ GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags' \
12
+ FROM node_revisions AS nr, \
13
+ node AS n \
14
+ LEFT OUTER JOIN term_node AS tn ON tn.nid = n.nid \
15
+ LEFT OUTER JOIN term_data AS td ON tn.tid = td.tid \
16
+ WHERE (%types%) \
17
+ AND n.vid = nr.vid \
18
+ GROUP BY n.nid"
19
+
20
+ def self.validate(options)
21
+ %w[dbname user].each do |option|
22
+ if options[option].nil?
23
+ abort "Missing mandatory option --#{option}."
24
+ end
25
+ end
26
+ end
27
+
28
+ def self.specify_options(c)
29
+ c.option 'dbname', '--dbname DB', 'Database name'
30
+ c.option 'user', '--user USER', 'Database user name'
31
+ c.option 'password', '--password PW', "Database user's password (default: '')"
32
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
33
+ c.option 'prefix', '--prefix PREFIX', 'Table prefix name'
34
+ c.option 'types', '--types TYPE1[,TYPE2[,TYPE3...]]', Array, 'The Drupal content types to be imported.'
35
+ end
36
+
37
+ def self.require_deps
38
+ BuntoImport.require_with_fallback(%w[
39
+ rubygems
40
+ sequel
41
+ fileutils
42
+ safe_yaml
43
+ mysql
44
+ ])
45
+ end
46
+
47
+ def self.process(options)
48
+ dbname = options.fetch('dbname')
49
+ user = options.fetch('user')
50
+ pass = options.fetch('password', "")
51
+ host = options.fetch('host', "localhost")
52
+ prefix = options.fetch('prefix', "")
53
+ types = options.fetch('types', ['blog', 'story', 'article'])
54
+
55
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
56
+
57
+ if prefix != ''
58
+ QUERY[" node "] = " " + prefix + "node "
59
+ QUERY[" node_revisions "] = " " + prefix + "node_revisions "
60
+ QUERY[" term_node "] = " " + prefix + "term_node "
61
+ QUERY[" term_data "] = " " + prefix + "term_data "
62
+ end
63
+
64
+ types = types.join("' OR n.type = '")
65
+ QUERY[" WHERE (%types%) "] = " WHERE (n.type = '#{types}') "
66
+
67
+ FileUtils.mkdir_p "_posts"
68
+ FileUtils.mkdir_p "_drafts"
69
+ FileUtils.mkdir_p "_layouts"
70
+
71
+ # Create the refresh layout
72
+ # Change the refresh url if you customized your permalink config
73
+ File.open("_layouts/refresh.html", "w") do |f|
74
+ f.puts <<EOF
75
+ <!DOCTYPE html>
76
+ <html>
77
+ <head>
78
+ <meta http-equiv="content-type" content="text/html; charset=utf-8" />
79
+ <meta http-equiv="refresh" content="0;url={{ page.refresh_to_post_id }}.html" />
80
+ </head>
81
+ </html>
82
+ EOF
83
+ end
84
+
85
+ db[QUERY].each do |post|
86
+ # Get required fields and construct Bunto compatible name
87
+ node_id = post[:nid]
88
+ title = post[:title]
89
+ content = post[:body]
90
+ tags = (post[:tags] || '').downcase.strip
91
+ created = post[:created]
92
+ time = Time.at(created)
93
+ is_published = post[:status] == 1
94
+ dir = is_published ? "_posts" : "_drafts"
95
+ slug = title.strip.downcase.gsub(/(&|&amp;)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
96
+ name = time.strftime("%Y-%m-%d-") + slug + '.md'
97
+
98
+ # Get the relevant fields as a hash, delete empty fields and convert
99
+ # to YAML for the header
100
+ data = {
101
+ 'layout' => 'post',
102
+ 'title' => title.to_s,
103
+ 'created' => created,
104
+ 'categories' => tags.split('|')
105
+ }.delete_if { |k,v| v.nil? || v == ''}.each_pair {
106
+ |k,v| ((v.is_a? String) ? v.force_encoding("UTF-8") : v)
107
+ }.to_yaml
108
+
109
+ # Write out the data and content to file
110
+ File.open("#{dir}/#{name}", "w") do |f|
111
+ f.puts data
112
+ f.puts "---"
113
+ f.puts content
114
+ end
115
+
116
+ # Make a file to redirect from the old Drupal URL
117
+ if is_published
118
+ aliases = db["SELECT dst FROM #{prefix}url_alias WHERE src = ?", "node/#{node_id}"].all
119
+
120
+ aliases.push(:dst => "node/#{node_id}")
121
+
122
+ aliases.each do |url_alias|
123
+ FileUtils.mkdir_p url_alias[:dst]
124
+ File.open("#{url_alias[:dst]}/index.md", "w") do |f|
125
+ f.puts "---"
126
+ f.puts "layout: refresh"
127
+ f.puts "refresh_to_post_id: /#{time.strftime("%Y/%m/%d/") + slug}"
128
+ f.puts "---"
129
+ end
130
+ end
131
+ end
132
+ end
133
+
134
+ # TODO: Make dirs & files for nodes of type 'page'
135
+ # Make refresh pages for these as well
136
+ end
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,111 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Drupal7 < Importer
4
+ # Reads a MySQL database via Sequel and creates a post file for each story
5
+ # and blog node.
6
+ QUERY = "SELECT n.title, \
7
+ fdb.body_value, \
8
+ fdb.body_summary, \
9
+ n.created, \
10
+ n.status, \
11
+ n.nid, \
12
+ u.name \
13
+ FROM node AS n, \
14
+ field_data_body AS fdb, \
15
+ users AS u \
16
+ WHERE (%types%) \
17
+ AND n.nid = fdb.entity_id \
18
+ AND n.vid = fdb.revision_id
19
+ AND n.uid = u.uid"
20
+
21
+ def self.validate(options)
22
+ %w[dbname user].each do |option|
23
+ if options[option].nil?
24
+ abort "Missing mandatory option --#{option}."
25
+ end
26
+ end
27
+ end
28
+
29
+ def self.specify_options(c)
30
+ c.option 'dbname', '--dbname DB', 'Database name'
31
+ c.option 'user', '--user USER', 'Database user name'
32
+ c.option 'password', '--password PW', 'Database user\'s password (default: "")'
33
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
34
+ c.option 'prefix', '--prefix PREFIX', 'Table prefix name'
35
+ c.option 'types', '--types TYPE1[,TYPE2[,TYPE3...]]', Array, 'The Drupal content types to be imported.'
36
+ end
37
+
38
+ def self.require_deps
39
+ BuntoImport.require_with_fallback(%w[
40
+ rubygems
41
+ sequel
42
+ fileutils
43
+ safe_yaml
44
+ ])
45
+ end
46
+
47
+ def self.process(options)
48
+ dbname = options.fetch('dbname')
49
+ user = options.fetch('user')
50
+ pass = options.fetch('password', "")
51
+ host = options.fetch('host', "localhost")
52
+ prefix = options.fetch('prefix', "")
53
+ types = options.fetch('types', ['blog', 'story', 'article'])
54
+
55
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
56
+
57
+ unless prefix.empty?
58
+ QUERY[" node "] = " " + prefix + "node "
59
+ QUERY[" field_data_body "] = " " + prefix + "field_data_body "
60
+ QUERY[" users "] = " " + prefix + "users "
61
+ end
62
+
63
+ types = types.join("' OR n.type = '")
64
+ QUERY[" WHERE (%types%) "] = " WHERE (n.type = '#{types}') "
65
+
66
+ FileUtils.mkdir_p "_posts"
67
+ FileUtils.mkdir_p "_drafts"
68
+ FileUtils.mkdir_p "_layouts"
69
+
70
+ db[QUERY].each do |post|
71
+ # Get required fields and construct Bunto compatible name
72
+ title = post[:title]
73
+ content = post[:body_value]
74
+ summary = post[:body_summary]
75
+ created = post[:created]
76
+ author = post[:name]
77
+ nid = post[:nid]
78
+ time = Time.at(created)
79
+ is_published = post[:status] == 1
80
+ dir = is_published ? "_posts" : "_drafts"
81
+ slug = title.strip.downcase.gsub(/(&|&amp;)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
82
+ name = time.strftime("%Y-%m-%d-") + slug + '.md'
83
+
84
+ # Get the relevant fields as a hash, delete empty fields and convert
85
+ # to YAML for the header
86
+ data = {
87
+ 'layout' => 'post',
88
+ 'title' => title.strip.force_encoding("UTF-8"),
89
+ 'author' => author,
90
+ 'nid' => nid,
91
+ 'created' => created,
92
+ 'excerpt' => summary
93
+ }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
94
+
95
+ # Write out the data and content to file
96
+ File.open("#{dir}/#{name}", "w") do |f|
97
+ f.puts data
98
+ f.puts "---"
99
+ f.puts content
100
+ end
101
+
102
+ end
103
+
104
+ # TODO: Make dirs & files for nodes of type 'page'
105
+ # Make refresh pages for these as well
106
+
107
+ # TODO: Make refresh dirs & files according to entries in url_alias table
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,96 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Easyblog < Importer
4
+ def self.validate(options)
5
+ %w[dbname user].each do |option|
6
+ if options[option].nil?
7
+ abort "Missing mandatory option --#{option}."
8
+ end
9
+ end
10
+ end
11
+
12
+ def self.specify_options(c)
13
+ c.option 'dbname', '--dbname', 'Database name'
14
+ c.option 'user', '--user', 'Database user name'
15
+ c.option 'password', '--password', "Database user's password (default: '')"
16
+ c.option 'host', '--host', 'Database host name'
17
+ c.option 'section', '--section', 'Table prefix name'
18
+ c.option 'prefix', '--prefix', 'Table prefix name'
19
+ end
20
+
21
+ def self.require_deps
22
+ BuntoImport.require_with_fallback(%w[
23
+ rubygems
24
+ sequel
25
+ fileutils
26
+ safe_yaml
27
+ ])
28
+ end
29
+
30
+ def self.process(options)
31
+ dbname = options.fetch('dbname')
32
+ user = options.fetch('user')
33
+ pass = options.fetch('password', '')
34
+ host = options.fetch('host', "localhost")
35
+ section = options.fetch('section', '1')
36
+ table_prefix = options.fetch('prefix', "jos_")
37
+
38
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
39
+
40
+ FileUtils.mkdir_p("_posts")
41
+
42
+ # Reads a MySQL database via Sequel and creates a post file for each
43
+ # post in wp_posts that has post_status = 'publish'. This restriction is
44
+ # made because 'draft' posts are not guaranteed to have valid dates.
45
+
46
+ query = "
47
+ select
48
+ ep.`title`, `permalink` as alias, concat(`intro`, `content`) as content, ep.`created`, ep.`id`, ec.`title` as category, tags
49
+ from
50
+ #{table_prefix}easyblog_post ep
51
+ left join #{table_prefix}easyblog_category ec on (ep.category_id = ec.id)
52
+ left join (
53
+ select
54
+ ept.post_id,
55
+ group_concat(et.alias order by alias separator ' ') as tags
56
+ from
57
+ #{table_prefix}easyblog_post_tag ept
58
+ join #{table_prefix}easyblog_tag et on (ept.tag_id = et.id)
59
+ group by
60
+ ept.post_id) x on (ep.id = x.post_id);
61
+ "
62
+
63
+ db[query].each do |post|
64
+ # Get required fields and construct Bunto compatible name.
65
+ title = post[:title]
66
+ slug = post[:alias]
67
+ date = post[:created]
68
+ content = post[:content]
69
+ category = post[:category]
70
+ tags = post[:tags]
71
+ name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day,
72
+ slug]
73
+
74
+ # Get the relevant fields as a hash, delete empty fields and convert
75
+ # to YAML for the header.
76
+ data = {
77
+ 'layout' => 'post',
78
+ 'title' => title.to_s,
79
+ 'joomla_id' => post[:id],
80
+ 'joomla_url' => post[:alias],
81
+ 'category' => post[:category],
82
+ 'tags' => post[:tags],
83
+ 'date' => date
84
+ }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
85
+
86
+ # Write out the data and content to file
87
+ File.open("_posts/#{name}", "w") do |f|
88
+ f.puts data
89
+ f.puts "---"
90
+ f.puts content
91
+ end
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,74 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Enki < Importer
4
+ SQL = <<-EOS
5
+ SELECT p.id,
6
+ p.title,
7
+ p.slug,
8
+ p.body,
9
+ p.published_at as date,
10
+ p.cached_tag_list as tags
11
+ FROM posts p
12
+ EOS
13
+
14
+ def self.validate(options)
15
+ %w[dbname user].each do |option|
16
+ if options[option].nil?
17
+ abort "Missing mandatory option --#{option}."
18
+ end
19
+ end
20
+ end
21
+
22
+ def self.specify_options(c)
23
+ c.option 'dbname', '--dbname', 'Database name'
24
+ c.option 'user', '--user', 'Database name'
25
+ c.option 'password', '--password', 'Database name (default: "")'
26
+ c.option 'host', '--host', 'Database name'
27
+ end
28
+
29
+ def self.require_deps
30
+ BuntoImport.require_with_fallback(%w[
31
+ rubygems
32
+ sequel
33
+ fileutils
34
+ pg
35
+ yaml
36
+ ])
37
+ end
38
+
39
+ # Just working with postgres, but can be easily adapted
40
+ # to work with both mysql and postgres.
41
+ def self.process(options)
42
+ dbname = options.fetch('dbname')
43
+ user = options.fetch('user')
44
+ pass = options.fetch('password', "")
45
+ host = options.fetch('host', "localhost")
46
+
47
+ FileUtils.mkdir_p('_posts')
48
+ db = Sequel.postgres(:database => dbname,
49
+ :user => user,
50
+ :password => pass,
51
+ :host => host,
52
+ :encoding => 'utf8')
53
+
54
+ db[SQL].each do |post|
55
+ name = [ sprintf("%.04d", post[:date].year),
56
+ sprintf("%.02d", post[:date].month),
57
+ sprintf("%.02d", post[:date].day),
58
+ post[:slug].strip ].join('-')
59
+ name += '.textile'
60
+
61
+ File.open("_posts/#{name}", 'w') do |f|
62
+ f.puts({ 'layout' => 'post',
63
+ 'title' => post[:title].to_s,
64
+ 'enki_id' => post[:id],
65
+ 'categories' => post[:tags]
66
+ }.delete_if { |k, v| v.nil? || v == '' }.to_yaml)
67
+ f.puts '---'
68
+ f.puts post[:body].delete("\r")
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end