bunto-import 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,96 @@
1
+ # encoding: UTF-8
2
+
3
+ module BuntoImport
4
+ module Importers
5
+ class CSV < Importer
6
+ def self.require_deps
7
+ BuntoImport.require_with_fallback(%w[
8
+ csv
9
+ fileutils
10
+ yaml
11
+ ])
12
+ end
13
+
14
+ def self.specify_options(c)
15
+ c.option 'file', '--file NAME', 'The CSV file to import (default: "posts.csv")'
16
+ c.option 'no-front-matter', '--no-front-matter', 'Do not add the default front matter to the post body'
17
+ end
18
+
19
+ # Reads a csv with title, permalink, body, published_at, and filter.
20
+ # It creates a post file for each row in the csv
21
+ def self.process(options)
22
+ file = options.fetch('file', "posts.csv")
23
+
24
+ FileUtils.mkdir_p "_posts"
25
+ posts = 0
26
+ abort "Cannot find the file '#{file}'. Aborting." unless File.file?(file)
27
+
28
+ ::CSV.foreach(file) do |row|
29
+ next if row[0] == "title" # header
30
+ posts += 1
31
+ write_post(CSVPost.new(row), options)
32
+ end
33
+ Bunto.logger.info "Created #{posts} posts!"
34
+ end
35
+
36
+ class CSVPost
37
+ attr_reader :title, :permalink, :body, :markup
38
+
39
+ MissingDataError = Class.new(RuntimeError)
40
+
41
+ # Creates a CSVPost
42
+ #
43
+ # row - Array of data, length of 4 or 5 with the columns:
44
+ #
45
+ # 1. title
46
+ # 2. permalink
47
+ # 3. body
48
+ # 4. published_at
49
+ # 5. markup (markdown, textile)
50
+ def initialize(row)
51
+ @title = row[0] || missing_data("Post title not present in first column.")
52
+ @permalink = row[1] || missing_data("Post permalink not present in second column.")
53
+ @body = row[2] || missing_data("Post body not present in third column.")
54
+ @published_at = row[3] || missing_data("Post publish date not present in fourth column.")
55
+ @markup = row[4] || "markdown"
56
+ end
57
+
58
+ def published_at
59
+ if @published_at && !@published_at.is_a?(DateTime)
60
+ @published_at = DateTime.parse(@published_at)
61
+ else
62
+ @published_at
63
+ end
64
+ end
65
+
66
+ def filename
67
+ "#{published_at.strftime("%Y-%m-%d")}-#{File.basename(permalink, ".*")}.#{markup}"
68
+ end
69
+
70
+ def missing_data(message)
71
+ raise MissingDataError, message
72
+ end
73
+ end
74
+
75
+ def self.write_post(post, options = {})
76
+ File.open(File.join("_posts", post.filename), "w") do |f|
77
+ write_frontmatter(f, post, options)
78
+ f.puts post.body
79
+ end
80
+ end
81
+
82
+ def self.write_frontmatter(f, post, options)
83
+ no_frontmatter = options.fetch('no-front-matter', false)
84
+ unless no_frontmatter
85
+ f.puts YAML.dump({
86
+ "layout" => "post",
87
+ "title" => post.title,
88
+ "date" => post.published_at.to_s,
89
+ "permalink" => post.permalink
90
+ })
91
+ f.puts "---"
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,139 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Drupal6 < Importer
4
+ # Reads a MySQL database via Sequel and creates a post file for each story
5
+ # and blog node.
6
+ QUERY = "SELECT n.nid, \
7
+ n.title, \
8
+ nr.body, \
9
+ n.created, \
10
+ n.status, \
11
+ GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags' \
12
+ FROM node_revisions AS nr, \
13
+ node AS n \
14
+ LEFT OUTER JOIN term_node AS tn ON tn.nid = n.nid \
15
+ LEFT OUTER JOIN term_data AS td ON tn.tid = td.tid \
16
+ WHERE (%types%) \
17
+ AND n.vid = nr.vid \
18
+ GROUP BY n.nid"
19
+
20
+ def self.validate(options)
21
+ %w[dbname user].each do |option|
22
+ if options[option].nil?
23
+ abort "Missing mandatory option --#{option}."
24
+ end
25
+ end
26
+ end
27
+
28
+ def self.specify_options(c)
29
+ c.option 'dbname', '--dbname DB', 'Database name'
30
+ c.option 'user', '--user USER', 'Database user name'
31
+ c.option 'password', '--password PW', "Database user's password (default: '')"
32
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
33
+ c.option 'prefix', '--prefix PREFIX', 'Table prefix name'
34
+ c.option 'types', '--types TYPE1[,TYPE2[,TYPE3...]]', Array, 'The Drupal content types to be imported.'
35
+ end
36
+
37
+ def self.require_deps
38
+ BuntoImport.require_with_fallback(%w[
39
+ rubygems
40
+ sequel
41
+ fileutils
42
+ safe_yaml
43
+ mysql
44
+ ])
45
+ end
46
+
47
+ def self.process(options)
48
+ dbname = options.fetch('dbname')
49
+ user = options.fetch('user')
50
+ pass = options.fetch('password', "")
51
+ host = options.fetch('host', "localhost")
52
+ prefix = options.fetch('prefix', "")
53
+ types = options.fetch('types', ['blog', 'story', 'article'])
54
+
55
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
56
+
57
+ if prefix != ''
58
+ QUERY[" node "] = " " + prefix + "node "
59
+ QUERY[" node_revisions "] = " " + prefix + "node_revisions "
60
+ QUERY[" term_node "] = " " + prefix + "term_node "
61
+ QUERY[" term_data "] = " " + prefix + "term_data "
62
+ end
63
+
64
+ types = types.join("' OR n.type = '")
65
+ QUERY[" WHERE (%types%) "] = " WHERE (n.type = '#{types}') "
66
+
67
+ FileUtils.mkdir_p "_posts"
68
+ FileUtils.mkdir_p "_drafts"
69
+ FileUtils.mkdir_p "_layouts"
70
+
71
+ # Create the refresh layout
72
+ # Change the refresh url if you customized your permalink config
73
+ File.open("_layouts/refresh.html", "w") do |f|
74
+ f.puts <<EOF
75
+ <!DOCTYPE html>
76
+ <html>
77
+ <head>
78
+ <meta http-equiv="content-type" content="text/html; charset=utf-8" />
79
+ <meta http-equiv="refresh" content="0;url={{ page.refresh_to_post_id }}.html" />
80
+ </head>
81
+ </html>
82
+ EOF
83
+ end
84
+
85
+ db[QUERY].each do |post|
86
+ # Get required fields and construct Bunto compatible name
87
+ node_id = post[:nid]
88
+ title = post[:title]
89
+ content = post[:body]
90
+ tags = (post[:tags] || '').downcase.strip
91
+ created = post[:created]
92
+ time = Time.at(created)
93
+ is_published = post[:status] == 1
94
+ dir = is_published ? "_posts" : "_drafts"
95
+ slug = title.strip.downcase.gsub(/(&|&amp;)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
96
+ name = time.strftime("%Y-%m-%d-") + slug + '.md'
97
+
98
+ # Get the relevant fields as a hash, delete empty fields and convert
99
+ # to YAML for the header
100
+ data = {
101
+ 'layout' => 'post',
102
+ 'title' => title.to_s,
103
+ 'created' => created,
104
+ 'categories' => tags.split('|')
105
+ }.delete_if { |k,v| v.nil? || v == ''}.each_pair {
106
+ |k,v| ((v.is_a? String) ? v.force_encoding("UTF-8") : v)
107
+ }.to_yaml
108
+
109
+ # Write out the data and content to file
110
+ File.open("#{dir}/#{name}", "w") do |f|
111
+ f.puts data
112
+ f.puts "---"
113
+ f.puts content
114
+ end
115
+
116
+ # Make a file to redirect from the old Drupal URL
117
+ if is_published
118
+ aliases = db["SELECT dst FROM #{prefix}url_alias WHERE src = ?", "node/#{node_id}"].all
119
+
120
+ aliases.push(:dst => "node/#{node_id}")
121
+
122
+ aliases.each do |url_alias|
123
+ FileUtils.mkdir_p url_alias[:dst]
124
+ File.open("#{url_alias[:dst]}/index.md", "w") do |f|
125
+ f.puts "---"
126
+ f.puts "layout: refresh"
127
+ f.puts "refresh_to_post_id: /#{time.strftime("%Y/%m/%d/") + slug}"
128
+ f.puts "---"
129
+ end
130
+ end
131
+ end
132
+ end
133
+
134
+ # TODO: Make dirs & files for nodes of type 'page'
135
+ # Make refresh pages for these as well
136
+ end
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,111 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Drupal7 < Importer
4
+ # Reads a MySQL database via Sequel and creates a post file for each story
5
+ # and blog node.
6
+ QUERY = "SELECT n.title, \
7
+ fdb.body_value, \
8
+ fdb.body_summary, \
9
+ n.created, \
10
+ n.status, \
11
+ n.nid, \
12
+ u.name \
13
+ FROM node AS n, \
14
+ field_data_body AS fdb, \
15
+ users AS u \
16
+ WHERE (%types%) \
17
+ AND n.nid = fdb.entity_id \
18
+ AND n.vid = fdb.revision_id
19
+ AND n.uid = u.uid"
20
+
21
+ def self.validate(options)
22
+ %w[dbname user].each do |option|
23
+ if options[option].nil?
24
+ abort "Missing mandatory option --#{option}."
25
+ end
26
+ end
27
+ end
28
+
29
+ def self.specify_options(c)
30
+ c.option 'dbname', '--dbname DB', 'Database name'
31
+ c.option 'user', '--user USER', 'Database user name'
32
+ c.option 'password', '--password PW', 'Database user\'s password (default: "")'
33
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
34
+ c.option 'prefix', '--prefix PREFIX', 'Table prefix name'
35
+ c.option 'types', '--types TYPE1[,TYPE2[,TYPE3...]]', Array, 'The Drupal content types to be imported.'
36
+ end
37
+
38
+ def self.require_deps
39
+ BuntoImport.require_with_fallback(%w[
40
+ rubygems
41
+ sequel
42
+ fileutils
43
+ safe_yaml
44
+ ])
45
+ end
46
+
47
+ def self.process(options)
48
+ dbname = options.fetch('dbname')
49
+ user = options.fetch('user')
50
+ pass = options.fetch('password', "")
51
+ host = options.fetch('host', "localhost")
52
+ prefix = options.fetch('prefix', "")
53
+ types = options.fetch('types', ['blog', 'story', 'article'])
54
+
55
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
56
+
57
+ unless prefix.empty?
58
+ QUERY[" node "] = " " + prefix + "node "
59
+ QUERY[" field_data_body "] = " " + prefix + "field_data_body "
60
+ QUERY[" users "] = " " + prefix + "users "
61
+ end
62
+
63
+ types = types.join("' OR n.type = '")
64
+ QUERY[" WHERE (%types%) "] = " WHERE (n.type = '#{types}') "
65
+
66
+ FileUtils.mkdir_p "_posts"
67
+ FileUtils.mkdir_p "_drafts"
68
+ FileUtils.mkdir_p "_layouts"
69
+
70
+ db[QUERY].each do |post|
71
+ # Get required fields and construct Bunto compatible name
72
+ title = post[:title]
73
+ content = post[:body_value]
74
+ summary = post[:body_summary]
75
+ created = post[:created]
76
+ author = post[:name]
77
+ nid = post[:nid]
78
+ time = Time.at(created)
79
+ is_published = post[:status] == 1
80
+ dir = is_published ? "_posts" : "_drafts"
81
+ slug = title.strip.downcase.gsub(/(&|&amp;)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
82
+ name = time.strftime("%Y-%m-%d-") + slug + '.md'
83
+
84
+ # Get the relevant fields as a hash, delete empty fields and convert
85
+ # to YAML for the header
86
+ data = {
87
+ 'layout' => 'post',
88
+ 'title' => title.strip.force_encoding("UTF-8"),
89
+ 'author' => author,
90
+ 'nid' => nid,
91
+ 'created' => created,
92
+ 'excerpt' => summary
93
+ }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
94
+
95
+ # Write out the data and content to file
96
+ File.open("#{dir}/#{name}", "w") do |f|
97
+ f.puts data
98
+ f.puts "---"
99
+ f.puts content
100
+ end
101
+
102
+ end
103
+
104
+ # TODO: Make dirs & files for nodes of type 'page'
105
+ # Make refresh pages for these as well
106
+
107
+ # TODO: Make refresh dirs & files according to entries in url_alias table
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,96 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Easyblog < Importer
4
+ def self.validate(options)
5
+ %w[dbname user].each do |option|
6
+ if options[option].nil?
7
+ abort "Missing mandatory option --#{option}."
8
+ end
9
+ end
10
+ end
11
+
12
+ def self.specify_options(c)
13
+ c.option 'dbname', '--dbname', 'Database name'
14
+ c.option 'user', '--user', 'Database user name'
15
+ c.option 'password', '--password', "Database user's password (default: '')"
16
+ c.option 'host', '--host', 'Database host name'
17
+ c.option 'section', '--section', 'Table prefix name'
18
+ c.option 'prefix', '--prefix', 'Table prefix name'
19
+ end
20
+
21
+ def self.require_deps
22
+ BuntoImport.require_with_fallback(%w[
23
+ rubygems
24
+ sequel
25
+ fileutils
26
+ safe_yaml
27
+ ])
28
+ end
29
+
30
+ def self.process(options)
31
+ dbname = options.fetch('dbname')
32
+ user = options.fetch('user')
33
+ pass = options.fetch('password', '')
34
+ host = options.fetch('host', "localhost")
35
+ section = options.fetch('section', '1')
36
+ table_prefix = options.fetch('prefix', "jos_")
37
+
38
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
39
+
40
+ FileUtils.mkdir_p("_posts")
41
+
42
+ # Reads a MySQL database via Sequel and creates a post file for each
43
+ # post in wp_posts that has post_status = 'publish'. This restriction is
44
+ # made because 'draft' posts are not guaranteed to have valid dates.
45
+
46
+ query = "
47
+ select
48
+ ep.`title`, `permalink` as alias, concat(`intro`, `content`) as content, ep.`created`, ep.`id`, ec.`title` as category, tags
49
+ from
50
+ #{table_prefix}easyblog_post ep
51
+ left join #{table_prefix}easyblog_category ec on (ep.category_id = ec.id)
52
+ left join (
53
+ select
54
+ ept.post_id,
55
+ group_concat(et.alias order by alias separator ' ') as tags
56
+ from
57
+ #{table_prefix}easyblog_post_tag ept
58
+ join #{table_prefix}easyblog_tag et on (ept.tag_id = et.id)
59
+ group by
60
+ ept.post_id) x on (ep.id = x.post_id);
61
+ "
62
+
63
+ db[query].each do |post|
64
+ # Get required fields and construct Bunto compatible name.
65
+ title = post[:title]
66
+ slug = post[:alias]
67
+ date = post[:created]
68
+ content = post[:content]
69
+ category = post[:category]
70
+ tags = post[:tags]
71
+ name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day,
72
+ slug]
73
+
74
+ # Get the relevant fields as a hash, delete empty fields and convert
75
+ # to YAML for the header.
76
+ data = {
77
+ 'layout' => 'post',
78
+ 'title' => title.to_s,
79
+ 'joomla_id' => post[:id],
80
+ 'joomla_url' => post[:alias],
81
+ 'category' => post[:category],
82
+ 'tags' => post[:tags],
83
+ 'date' => date
84
+ }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
85
+
86
+ # Write out the data and content to file
87
+ File.open("_posts/#{name}", "w") do |f|
88
+ f.puts data
89
+ f.puts "---"
90
+ f.puts content
91
+ end
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,74 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Enki < Importer
4
+ SQL = <<-EOS
5
+ SELECT p.id,
6
+ p.title,
7
+ p.slug,
8
+ p.body,
9
+ p.published_at as date,
10
+ p.cached_tag_list as tags
11
+ FROM posts p
12
+ EOS
13
+
14
+ def self.validate(options)
15
+ %w[dbname user].each do |option|
16
+ if options[option].nil?
17
+ abort "Missing mandatory option --#{option}."
18
+ end
19
+ end
20
+ end
21
+
22
+ def self.specify_options(c)
23
+ c.option 'dbname', '--dbname', 'Database name'
24
+ c.option 'user', '--user', 'Database name'
25
+ c.option 'password', '--password', 'Database name (default: "")'
26
+ c.option 'host', '--host', 'Database name'
27
+ end
28
+
29
+ def self.require_deps
30
+ BuntoImport.require_with_fallback(%w[
31
+ rubygems
32
+ sequel
33
+ fileutils
34
+ pg
35
+ yaml
36
+ ])
37
+ end
38
+
39
+ # Just working with postgres, but can be easily adapted
40
+ # to work with both mysql and postgres.
41
+ def self.process(options)
42
+ dbname = options.fetch('dbname')
43
+ user = options.fetch('user')
44
+ pass = options.fetch('password', "")
45
+ host = options.fetch('host', "localhost")
46
+
47
+ FileUtils.mkdir_p('_posts')
48
+ db = Sequel.postgres(:database => dbname,
49
+ :user => user,
50
+ :password => pass,
51
+ :host => host,
52
+ :encoding => 'utf8')
53
+
54
+ db[SQL].each do |post|
55
+ name = [ sprintf("%.04d", post[:date].year),
56
+ sprintf("%.02d", post[:date].month),
57
+ sprintf("%.02d", post[:date].day),
58
+ post[:slug].strip ].join('-')
59
+ name += '.textile'
60
+
61
+ File.open("_posts/#{name}", 'w') do |f|
62
+ f.puts({ 'layout' => 'post',
63
+ 'title' => post[:title].to_s,
64
+ 'enki_id' => post[:id],
65
+ 'categories' => post[:tags]
66
+ }.delete_if { |k, v| v.nil? || v == '' }.to_yaml)
67
+ f.puts '---'
68
+ f.puts post[:body].delete("\r")
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end