bunto-import 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,68 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Ghost < Importer
4
+
5
+ def self.specify_options(c)
6
+ c.option 'dbfile', '--dbfile', 'Database file (default: ghost.db)'
7
+ end
8
+
9
+ def self.require_deps
10
+ BuntoImport.require_with_fallback(%w[
11
+ rubygems
12
+ sequel
13
+ fileutils
14
+ safe_yaml
15
+ ])
16
+ end
17
+
18
+ def self.process(options)
19
+ posts = fetch_posts(options.fetch('dbfile', 'ghost.db'))
20
+ if !posts.empty?
21
+ FileUtils.mkdir_p("_posts")
22
+ FileUtils.mkdir_p("_drafts")
23
+ posts.each do |post|
24
+ write_post_to_file(post)
25
+ end
26
+ end
27
+ end
28
+
29
+ private
30
+ def self.fetch_posts(dbfile)
31
+ db = Sequel.sqlite(dbfile)
32
+ query = "SELECT `title`, `slug`, `markdown`, `created_at`, `status` FROM posts"
33
+ db[query]
34
+ end
35
+
36
+ def self.write_post_to_file(post)
37
+ # detect if the post is a draft
38
+ draft = post[:status].eql?('draft')
39
+
40
+ # Ghost saves the time in an weird format with 3 more numbers.
41
+ # But the time is correct when we remove the last 3 numbers.
42
+ date = Time.at(post[:created_at].to_i.to_s[0..-4].to_i)
43
+
44
+ # the directory where the file will be saved to. either _drafts or _posts
45
+ directory = draft ? "_drafts" : "_posts"
46
+
47
+ # the filename under which the post is stored
48
+ filename = File.join(directory, "#{date.strftime('%Y-%m-%d')}-#{post[:slug]}.markdown")
49
+
50
+ # the YAML FrontMatter
51
+ frontmatter = { 'layout' => 'post', 'title' => post[:title] }
52
+ frontmatter['date'] = date if !draft # only add the date to the frontmatter when the post is published
53
+ frontmatter.delete_if { |k,v| v.nil? || v == '' } # removes empty fields
54
+
55
+ # write the posts to disk
56
+ write_file(filename, frontmatter.to_yaml, post[:markdown])
57
+ end
58
+
59
+ def self.write_file(filename, frontmatter, content)
60
+ File.open(filename, "w") do |f|
61
+ f.puts frontmatter
62
+ f.puts "---"
63
+ f.puts content
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,64 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class GoogleReader < Importer
4
+ def self.validate(options)
5
+ if options['source'].nil?
6
+ abort "Missing mandatory option --source."
7
+ end
8
+ end
9
+
10
+ def self.specify_options(c)
11
+ c.option 'source', '--source', 'Source XML file of Google Reader export'
12
+ end
13
+
14
+ def self.require_deps
15
+ BuntoImport.require_with_fallback(%w[
16
+ rubygems
17
+ rss
18
+ fileutils
19
+ safe_yaml
20
+ open-uri
21
+ rexml/document
22
+ date
23
+ ])
24
+ end
25
+
26
+ # Process the import.
27
+ #
28
+ # source - a URL or a local file String.
29
+ #
30
+ # Returns nothing.
31
+ def self.process(options)
32
+ source = options.fetch('source')
33
+
34
+ open(source) do |content|
35
+ feed = RSS::Parser.parse(content)
36
+
37
+ raise "There doesn't appear to be any RSS items at the source (#{source}) provided." unless feed
38
+
39
+ feed.items.each do |item|
40
+ title = item.title.content.to_s
41
+ formatted_date = Date.parse(item.published.to_s)
42
+ post_name = title.split(%r{ |!|/|:|&|-|$|,}).map do |i|
43
+ i.downcase if i != ''
44
+ end.compact.join('-')
45
+ name = "#{formatted_date}-#{post_name}"
46
+
47
+ header = {
48
+ 'layout' => 'post',
49
+ 'title' => title
50
+ }
51
+
52
+ FileUtils.mkdir_p("_posts")
53
+
54
+ File.open("_posts/#{name}.html", "w") do |f|
55
+ f.puts header.to_yaml
56
+ f.puts "---\n\n"
57
+ f.puts item.content.content.to_s
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,90 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Joomla < Importer
4
+ def self.validate(options)
5
+ %w[dbname user].each do |option|
6
+ if options[option].nil?
7
+ abort "Missing mandatory option --#{option}."
8
+ end
9
+ end
10
+ end
11
+
12
+ def self.specify_options(c)
13
+ c.option 'dbname', '--dbname', 'Database name'
14
+ c.option 'user', '--user', 'Database user name'
15
+ c.option 'password', '--password', "Database user's password (default: '')"
16
+ c.option 'host', '--host', 'Database host name'
17
+ c.option 'section', '--section', 'Table prefix name'
18
+ c.option 'prefix', '--prefix', 'Table prefix name'
19
+ end
20
+
21
+ def self.require_deps
22
+ BuntoImport.require_with_fallback(%w[
23
+ rubygems
24
+ sequel
25
+ fileutils
26
+ safe_yaml
27
+ ])
28
+ end
29
+
30
+ def self.process(options)
31
+ dbname = options.fetch('dbname')
32
+ user = options.fetch('user')
33
+ pass = options.fetch('password', '')
34
+ host = options.fetch('host', "localhost")
35
+ section = options.fetch('section', '1')
36
+ table_prefix = options.fetch('prefix', "jos_")
37
+
38
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
39
+
40
+ FileUtils.mkdir_p("_posts")
41
+
42
+ # Reads a MySQL database via Sequel and creates a post file for each
43
+ # post in wp_posts that has post_status = 'publish'. This restriction is
44
+ # made because 'draft' posts are not guaranteed to have valid dates.
45
+ query = "SELECT `title`, `alias`, CONCAT(`introtext`,`fulltext`) as content, `created`, `id` FROM #{table_prefix}content WHERE (state = '0' OR state = '1') AND sectionid = '#{section}'"
46
+
47
+ db[query].each do |post|
48
+ # Get required fields and construct Bunto compatible name.
49
+ title = post[:title]
50
+ date = post[:created]
51
+ content = post[:content]
52
+ id = post[:id]
53
+
54
+ # Construct a slug from the title if alias field empty.
55
+ # Remove illegal filename characters.
56
+ if !post[:alias] or post[:alias].empty?
57
+ slug = sluggify(post[:title])
58
+ else
59
+ slug = sluggify(post[:alias])
60
+ end
61
+
62
+ name = "%02d-%02d-%02d-%03d-%s.markdown" % [date.year, date.month, date.day,
63
+ id,slug]
64
+
65
+ # Get the relevant fields as a hash, delete empty fields and convert
66
+ # to YAML for the header.
67
+ data = {
68
+ 'layout' => 'post',
69
+ 'title' => title.to_s,
70
+ 'joomla_id' => post[:id],
71
+ 'joomla_url' => post[:alias],
72
+ 'date' => date
73
+ }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
74
+
75
+ # Write out the data and content to file
76
+ File.open("_posts/#{name}", "w") do |f|
77
+ f.puts data
78
+ f.puts "---"
79
+ f.puts content
80
+ end
81
+ end
82
+ end
83
+
84
+ # Borrowed from the Wordpress importer
85
+ def self.sluggify( title )
86
+ title = title.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,91 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Joomla3 < Importer
4
+ def self.validate(options)
5
+ %w[dbname user prefix].each do |option|
6
+ if options[option].nil?
7
+ abort "Missing mandatory option --#{option}."
8
+ end
9
+ end
10
+ end
11
+
12
+ def self.specify_options(c)
13
+ c.option 'dbname', '--dbname', 'Database name'
14
+ c.option 'user', '--user', 'Database user name'
15
+ c.option 'password', '--password', "Database user's password (default: '')"
16
+ c.option 'host', '--host', 'Database host name'
17
+ c.option 'category', '--category', 'ID of the category'
18
+ c.option 'prefix', '--prefix', 'Table prefix name'
19
+ end
20
+
21
+ def self.require_deps
22
+ BuntoImport.require_with_fallback(%w[
23
+ rubygems
24
+ sequel
25
+ fileutils
26
+ safe_yaml
27
+ ])
28
+ end
29
+
30
+ def self.process(options)
31
+ dbname = options.fetch('dbname')
32
+ user = options.fetch('user')
33
+ pass = options.fetch('password', '')
34
+ host = options.fetch('host', "localhost")
35
+ cid = options.fetch('category', 0)
36
+ table_prefix = options.fetch('prefix', "jos_")
37
+
38
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
39
+
40
+ FileUtils.mkdir_p("_posts")
41
+
42
+ # Reads a MySQL database via Sequel and creates a post file for each
43
+ # post in #__content that is published.
44
+ query = "SELECT `cn`.`title`, `cn`.`alias`, `cn`.`introtext`, CONCAT(`cn`.`introtext`,`cn`.`fulltext`) AS `content`, "
45
+ query << "`cn`.`created`, `cn`.`id`, `ct`.`title` AS `category`, `u`.`name` AS `author` "
46
+ query << "FROM `#{table_prefix}content` AS `cn` JOIN `#{table_prefix}categories` AS `ct` ON `cn`.`catid` = `ct`.`id` "
47
+ query << "JOIN `#{table_prefix}users` AS `u` ON `cn`.`created_by` = `u`.`id` "
48
+ query << "WHERE (`cn`.`state` = '1' OR `cn`.`state` = '2') " # Only published and archived content items to be imported
49
+
50
+ if cid > 0
51
+ query << " AND `cn`.`catid` = '#{cid}' "
52
+ else
53
+ query << " AND `cn`.`catid` != '2' " #Filter out uncategorized content
54
+ end
55
+
56
+ db[query].each do |post|
57
+ # Get required fields and construct Bunto compatible name.
58
+ title = post[:title]
59
+ slug = post[:alias]
60
+ date = post[:created]
61
+ author = post[:author]
62
+ category = post[:category]
63
+ content = post[:content]
64
+ excerpt = post[:introtext]
65
+ name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day,
66
+ slug]
67
+
68
+ # Get the relevant fields as a hash, delete empty fields and convert
69
+ # to YAML for the header.
70
+ data = {
71
+ 'layout' => 'post',
72
+ 'title' => title.to_s,
73
+ 'joomla_id' => post[:id],
74
+ 'joomla_url' => slug,
75
+ 'date' => date,
76
+ 'author' => author,
77
+ 'excerpt' => excerpt.strip.to_s,
78
+ 'category' => category
79
+ }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
80
+
81
+ # Write out the data and content to file
82
+ File.open("_posts/#{name}", "w") do |f|
83
+ f.puts data
84
+ f.puts "---"
85
+ f.puts content
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,125 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Jrnl < Importer
4
+
5
+ def self.require_deps
6
+ BuntoImport.require_with_fallback(%w[
7
+ time
8
+ rubygems
9
+ safe_yaml
10
+ ])
11
+ end
12
+
13
+ def self.specify_options(c)
14
+ c.option 'file', '--file FILENAME', 'Journal file (default: "~/journal.txt")'
15
+ c.option 'time_format', '--time_format FORMAT', 'Time format of your journal (default: "%Y-%m-%d %H:%M")'
16
+ c.option 'extension', '--extension EXT', 'Output extension (default: "md")'
17
+ c.option 'layout', '--layout NAME', 'Output post layout (default: "post")'
18
+ end
19
+
20
+ # Reads a jrnl file and creates a new post for each entry
21
+ # The following overrides are available:
22
+ # :file path to input file
23
+ # :time_format the format used by the jrnl configuration
24
+ # :extension the extension format of the output files
25
+ # :layout explicitly set the layout of the output
26
+ def self.process(options)
27
+ file = options.fetch('file', "~/journal.txt")
28
+ time_format = options.fetch('time_format', "%Y-%m-%d %H:%M")
29
+ extension = options.fetch('extension', "md")
30
+ layout = options.fetch('layout', "post")
31
+
32
+ date_length = Time.now.strftime(time_format).length
33
+
34
+ # convert relative to absolute if needed
35
+ file = File.expand_path(file)
36
+
37
+ abort "The jrnl file was not found. Please make sure '#{file}' exists. You can specify a different file using the --file switch." unless File.file?(file)
38
+
39
+ input = File.read(file)
40
+ entries = input.split("\n\n");
41
+
42
+ entries.each do |entry|
43
+ # split dateline and body
44
+ # content[0] has the date and title
45
+ # content[1] has the post body
46
+ content = entry.split("\n")
47
+
48
+ body = get_post_content(content)
49
+ date = get_date(content[0], date_length)
50
+ title = get_title(content[0], date_length)
51
+ slug = create_slug(title)
52
+ filename = create_filename(date, slug, extension)
53
+ meta = create_meta(layout, title, date) # prepare YAML meta data
54
+
55
+ write_file(filename, meta, body) # write to file
56
+ end
57
+ end
58
+
59
+ # strip body from jrnl entry
60
+ def self.get_post_content(content)
61
+ return content[1]
62
+ end
63
+
64
+ # strip timestamp from the dateline
65
+ def self.get_date(content, offset)
66
+ return content[0, offset]
67
+ end
68
+
69
+ # strip title from the dateline
70
+ def self.get_title(content, offset)
71
+ return content[offset + 1, content.length]
72
+ end
73
+
74
+ # generate slug
75
+ def self.create_slug(title)
76
+ return title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
77
+ end
78
+
79
+ # generate filename
80
+ def self.create_filename(date, slug, extension)
81
+ return "#{Time.parse(date).strftime("%Y-%m-%d")}-#{slug}.#{extension}"
82
+ end
83
+
84
+ # Prepare YAML meta data
85
+ #
86
+ # layout - name of the layout
87
+ # title - title of the entry
88
+ # date - date of entry creation
89
+ #
90
+ # Examples
91
+ #
92
+ # create_meta("post", "Entry 1", "2013-01-01 13:00")
93
+ # # => "---\nlayout: post\ntitle: Entry 1\ndate: 2013-01-01 13:00\n"
94
+ #
95
+ # Returns array converted to YAML
96
+ def self.create_meta(layout, title, date)
97
+ data = {
98
+ 'layout' => layout,
99
+ 'title' => title,
100
+ 'date' => Time.parse(date).strftime("%Y-%m-%d %H:%M %z")
101
+ }.to_yaml
102
+ return data;
103
+ end
104
+
105
+ # Writes given data to file
106
+ #
107
+ # filename - name of the output file
108
+ # meta - YAML header data
109
+ # body - jrnl entry content
110
+ #
111
+ # Examples
112
+ #
113
+ # write_file("2013-01-01-entry-1.md", "---\nlayout: post\ntitle: Entry 1\ndate: 2013-01-01 13:00\n", "This is the first entry for my new journal")
114
+ #
115
+ # Writes file to _posts/filename
116
+ def self.write_file(filename, meta, body)
117
+ File.open("_posts/#{filename}", "w") do |f|
118
+ f.puts meta
119
+ f.puts "---\n\n"
120
+ f.puts body
121
+ end
122
+ end
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,72 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Marley < Importer
4
+ def self.validate(options)
5
+ if options['marley_data_dir'].nil?
6
+ Bunto.logger.abort_with "Missing mandatory option --marley_data_dir."
7
+ else
8
+ unless File.directory?(options['marley_data_dir'])
9
+ raise ArgumentError, "marley dir '#{options['marley_data_dir']}' not found"
10
+ end
11
+ end
12
+ end
13
+
14
+ def self.regexp
15
+ { :id => /^\d{0,4}-{0,1}(.*)$/,
16
+ :title => /^#\s*(.*)\s+$/,
17
+ :title_with_date => /^#\s*(.*)\s+\(([0-9\/]+)\)$/,
18
+ :published_on => /.*\s+\(([0-9\/]+)\)$/,
19
+ :perex => /^([^\#\n]+\n)$/,
20
+ :meta => /^\{\{\n(.*)\}\}\n$/mi # Multiline Regexp
21
+ }
22
+ end
23
+
24
+ def self.require_deps
25
+ BuntoImport.require_with_fallback(%w[
26
+ fileutils
27
+ safe_yaml
28
+ ])
29
+ end
30
+
31
+ def self.specify_options(c)
32
+ c.option 'marley_data_dir', '--marley_data_dir DIR', 'The dir containing your marley data'
33
+ end
34
+
35
+ def self.process(options)
36
+ marley_data_dir = options.fetch('marley_data_dir')
37
+
38
+ FileUtils.mkdir_p "_posts"
39
+
40
+ posts = 0
41
+ Dir["#{marley_data_dir}/**/*.txt"].each do |f|
42
+ next unless File.exists?(f)
43
+
44
+ #copied over from marley's app/lib/post.rb
45
+ file_content = File.read(f)
46
+ meta_content = file_content.slice!( self.regexp[:meta] )
47
+ body = file_content.sub( self.regexp[:title], '').sub( self.regexp[:perex], '').strip
48
+
49
+ title = file_content.scan( self.regexp[:title] ).first.to_s.strip
50
+ prerex = file_content.scan( self.regexp[:perex] ).first.to_s.strip
51
+ published_on = DateTime.parse( post[:published_on] ) rescue File.mtime( File.dirname(f) )
52
+ meta = ( meta_content ) ? YAML::load( meta_content.scan( self.regexp[:meta]).to_s ) : {}
53
+ meta['title'] = title
54
+ meta['layout'] = 'post'
55
+
56
+ formatted_date = published_on.strftime('%Y-%m-%d')
57
+ post_name = File.dirname(f).split(%r{/}).last.gsub(/\A\d+-/, '')
58
+
59
+ name = "#{formatted_date}-#{post_name}"
60
+ File.open("_posts/#{name}.markdown", "w") do |f|
61
+ f.puts meta.to_yaml
62
+ f.puts "---\n"
63
+ f.puts "\n#{prerex}\n\n" if prerex
64
+ f.puts body
65
+ end
66
+ posts += 1
67
+ end
68
+ "Created #{posts} posts!"
69
+ end
70
+ end
71
+ end
72
+ end