bunto-import 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,68 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Ghost < Importer
4
+
5
+ def self.specify_options(c)
6
+ c.option 'dbfile', '--dbfile', 'Database file (default: ghost.db)'
7
+ end
8
+
9
+ def self.require_deps
10
+ BuntoImport.require_with_fallback(%w[
11
+ rubygems
12
+ sequel
13
+ fileutils
14
+ safe_yaml
15
+ ])
16
+ end
17
+
18
+ def self.process(options)
19
+ posts = fetch_posts(options.fetch('dbfile', 'ghost.db'))
20
+ if !posts.empty?
21
+ FileUtils.mkdir_p("_posts")
22
+ FileUtils.mkdir_p("_drafts")
23
+ posts.each do |post|
24
+ write_post_to_file(post)
25
+ end
26
+ end
27
+ end
28
+
29
+ private
30
+ def self.fetch_posts(dbfile)
31
+ db = Sequel.sqlite(dbfile)
32
+ query = "SELECT `title`, `slug`, `markdown`, `created_at`, `status` FROM posts"
33
+ db[query]
34
+ end
35
+
36
+ def self.write_post_to_file(post)
37
+ # detect if the post is a draft
38
+ draft = post[:status].eql?('draft')
39
+
40
+ # Ghost saves the time in an weird format with 3 more numbers.
41
+ # But the time is correct when we remove the last 3 numbers.
42
+ date = Time.at(post[:created_at].to_i.to_s[0..-4].to_i)
43
+
44
+ # the directory where the file will be saved to. either _drafts or _posts
45
+ directory = draft ? "_drafts" : "_posts"
46
+
47
+ # the filename under which the post is stored
48
+ filename = File.join(directory, "#{date.strftime('%Y-%m-%d')}-#{post[:slug]}.markdown")
49
+
50
+ # the YAML FrontMatter
51
+ frontmatter = { 'layout' => 'post', 'title' => post[:title] }
52
+ frontmatter['date'] = date if !draft # only add the date to the frontmatter when the post is published
53
+ frontmatter.delete_if { |k,v| v.nil? || v == '' } # removes empty fields
54
+
55
+ # write the posts to disk
56
+ write_file(filename, frontmatter.to_yaml, post[:markdown])
57
+ end
58
+
59
+ def self.write_file(filename, frontmatter, content)
60
+ File.open(filename, "w") do |f|
61
+ f.puts frontmatter
62
+ f.puts "---"
63
+ f.puts content
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,64 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class GoogleReader < Importer
4
+ def self.validate(options)
5
+ if options['source'].nil?
6
+ abort "Missing mandatory option --source."
7
+ end
8
+ end
9
+
10
+ def self.specify_options(c)
11
+ c.option 'source', '--source', 'Source XML file of Google Reader export'
12
+ end
13
+
14
+ def self.require_deps
15
+ BuntoImport.require_with_fallback(%w[
16
+ rubygems
17
+ rss
18
+ fileutils
19
+ safe_yaml
20
+ open-uri
21
+ rexml/document
22
+ date
23
+ ])
24
+ end
25
+
26
+ # Process the import.
27
+ #
28
+ # source - a URL or a local file String.
29
+ #
30
+ # Returns nothing.
31
+ def self.process(options)
32
+ source = options.fetch('source')
33
+
34
+ open(source) do |content|
35
+ feed = RSS::Parser.parse(content)
36
+
37
+ raise "There doesn't appear to be any RSS items at the source (#{source}) provided." unless feed
38
+
39
+ feed.items.each do |item|
40
+ title = item.title.content.to_s
41
+ formatted_date = Date.parse(item.published.to_s)
42
+ post_name = title.split(%r{ |!|/|:|&|-|$|,}).map do |i|
43
+ i.downcase if i != ''
44
+ end.compact.join('-')
45
+ name = "#{formatted_date}-#{post_name}"
46
+
47
+ header = {
48
+ 'layout' => 'post',
49
+ 'title' => title
50
+ }
51
+
52
+ FileUtils.mkdir_p("_posts")
53
+
54
+ File.open("_posts/#{name}.html", "w") do |f|
55
+ f.puts header.to_yaml
56
+ f.puts "---\n\n"
57
+ f.puts item.content.content.to_s
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,90 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Joomla < Importer
4
+ def self.validate(options)
5
+ %w[dbname user].each do |option|
6
+ if options[option].nil?
7
+ abort "Missing mandatory option --#{option}."
8
+ end
9
+ end
10
+ end
11
+
12
+ def self.specify_options(c)
13
+ c.option 'dbname', '--dbname', 'Database name'
14
+ c.option 'user', '--user', 'Database user name'
15
+ c.option 'password', '--password', "Database user's password (default: '')"
16
+ c.option 'host', '--host', 'Database host name'
17
+ c.option 'section', '--section', 'Table prefix name'
18
+ c.option 'prefix', '--prefix', 'Table prefix name'
19
+ end
20
+
21
+ def self.require_deps
22
+ BuntoImport.require_with_fallback(%w[
23
+ rubygems
24
+ sequel
25
+ fileutils
26
+ safe_yaml
27
+ ])
28
+ end
29
+
30
+ def self.process(options)
31
+ dbname = options.fetch('dbname')
32
+ user = options.fetch('user')
33
+ pass = options.fetch('password', '')
34
+ host = options.fetch('host', "localhost")
35
+ section = options.fetch('section', '1')
36
+ table_prefix = options.fetch('prefix', "jos_")
37
+
38
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
39
+
40
+ FileUtils.mkdir_p("_posts")
41
+
42
+ # Reads a MySQL database via Sequel and creates a post file for each
43
+ # post in wp_posts that has post_status = 'publish'. This restriction is
44
+ # made because 'draft' posts are not guaranteed to have valid dates.
45
+ query = "SELECT `title`, `alias`, CONCAT(`introtext`,`fulltext`) as content, `created`, `id` FROM #{table_prefix}content WHERE (state = '0' OR state = '1') AND sectionid = '#{section}'"
46
+
47
+ db[query].each do |post|
48
+ # Get required fields and construct Bunto compatible name.
49
+ title = post[:title]
50
+ date = post[:created]
51
+ content = post[:content]
52
+ id = post[:id]
53
+
54
+ # Construct a slug from the title if alias field empty.
55
+ # Remove illegal filename characters.
56
+ if !post[:alias] or post[:alias].empty?
57
+ slug = sluggify(post[:title])
58
+ else
59
+ slug = sluggify(post[:alias])
60
+ end
61
+
62
+ name = "%02d-%02d-%02d-%03d-%s.markdown" % [date.year, date.month, date.day,
63
+ id,slug]
64
+
65
+ # Get the relevant fields as a hash, delete empty fields and convert
66
+ # to YAML for the header.
67
+ data = {
68
+ 'layout' => 'post',
69
+ 'title' => title.to_s,
70
+ 'joomla_id' => post[:id],
71
+ 'joomla_url' => post[:alias],
72
+ 'date' => date
73
+ }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
74
+
75
+ # Write out the data and content to file
76
+ File.open("_posts/#{name}", "w") do |f|
77
+ f.puts data
78
+ f.puts "---"
79
+ f.puts content
80
+ end
81
+ end
82
+ end
83
+
84
+ # Borrowed from the Wordpress importer
85
+ def self.sluggify( title )
86
+ title = title.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,91 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Joomla3 < Importer
4
+ def self.validate(options)
5
+ %w[dbname user prefix].each do |option|
6
+ if options[option].nil?
7
+ abort "Missing mandatory option --#{option}."
8
+ end
9
+ end
10
+ end
11
+
12
+ def self.specify_options(c)
13
+ c.option 'dbname', '--dbname', 'Database name'
14
+ c.option 'user', '--user', 'Database user name'
15
+ c.option 'password', '--password', "Database user's password (default: '')"
16
+ c.option 'host', '--host', 'Database host name'
17
+ c.option 'category', '--category', 'ID of the category'
18
+ c.option 'prefix', '--prefix', 'Table prefix name'
19
+ end
20
+
21
+ def self.require_deps
22
+ BuntoImport.require_with_fallback(%w[
23
+ rubygems
24
+ sequel
25
+ fileutils
26
+ safe_yaml
27
+ ])
28
+ end
29
+
30
+ def self.process(options)
31
+ dbname = options.fetch('dbname')
32
+ user = options.fetch('user')
33
+ pass = options.fetch('password', '')
34
+ host = options.fetch('host', "localhost")
35
+ cid = options.fetch('category', 0)
36
+ table_prefix = options.fetch('prefix', "jos_")
37
+
38
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
39
+
40
+ FileUtils.mkdir_p("_posts")
41
+
42
+ # Reads a MySQL database via Sequel and creates a post file for each
43
+ # post in #__content that is published.
44
+ query = "SELECT `cn`.`title`, `cn`.`alias`, `cn`.`introtext`, CONCAT(`cn`.`introtext`,`cn`.`fulltext`) AS `content`, "
45
+ query << "`cn`.`created`, `cn`.`id`, `ct`.`title` AS `category`, `u`.`name` AS `author` "
46
+ query << "FROM `#{table_prefix}content` AS `cn` JOIN `#{table_prefix}categories` AS `ct` ON `cn`.`catid` = `ct`.`id` "
47
+ query << "JOIN `#{table_prefix}users` AS `u` ON `cn`.`created_by` = `u`.`id` "
48
+ query << "WHERE (`cn`.`state` = '1' OR `cn`.`state` = '2') " # Only published and archived content items to be imported
49
+
50
+ if cid > 0
51
+ query << " AND `cn`.`catid` = '#{cid}' "
52
+ else
53
+ query << " AND `cn`.`catid` != '2' " #Filter out uncategorized content
54
+ end
55
+
56
+ db[query].each do |post|
57
+ # Get required fields and construct Bunto compatible name.
58
+ title = post[:title]
59
+ slug = post[:alias]
60
+ date = post[:created]
61
+ author = post[:author]
62
+ category = post[:category]
63
+ content = post[:content]
64
+ excerpt = post[:introtext]
65
+ name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day,
66
+ slug]
67
+
68
+ # Get the relevant fields as a hash, delete empty fields and convert
69
+ # to YAML for the header.
70
+ data = {
71
+ 'layout' => 'post',
72
+ 'title' => title.to_s,
73
+ 'joomla_id' => post[:id],
74
+ 'joomla_url' => slug,
75
+ 'date' => date,
76
+ 'author' => author,
77
+ 'excerpt' => excerpt.strip.to_s,
78
+ 'category' => category
79
+ }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
80
+
81
+ # Write out the data and content to file
82
+ File.open("_posts/#{name}", "w") do |f|
83
+ f.puts data
84
+ f.puts "---"
85
+ f.puts content
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,125 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Jrnl < Importer
4
+
5
+ def self.require_deps
6
+ BuntoImport.require_with_fallback(%w[
7
+ time
8
+ rubygems
9
+ safe_yaml
10
+ ])
11
+ end
12
+
13
+ def self.specify_options(c)
14
+ c.option 'file', '--file FILENAME', 'Journal file (default: "~/journal.txt")'
15
+ c.option 'time_format', '--time_format FORMAT', 'Time format of your journal (default: "%Y-%m-%d %H:%M")'
16
+ c.option 'extension', '--extension EXT', 'Output extension (default: "md")'
17
+ c.option 'layout', '--layout NAME', 'Output post layout (default: "post")'
18
+ end
19
+
20
+ # Reads a jrnl file and creates a new post for each entry
21
+ # The following overrides are available:
22
+ # :file path to input file
23
+ # :time_format the format used by the jrnl configuration
24
+ # :extension the extension format of the output files
25
+ # :layout explicitly set the layout of the output
26
+ def self.process(options)
27
+ file = options.fetch('file', "~/journal.txt")
28
+ time_format = options.fetch('time_format', "%Y-%m-%d %H:%M")
29
+ extension = options.fetch('extension', "md")
30
+ layout = options.fetch('layout', "post")
31
+
32
+ date_length = Time.now.strftime(time_format).length
33
+
34
+ # convert relative to absolute if needed
35
+ file = File.expand_path(file)
36
+
37
+ abort "The jrnl file was not found. Please make sure '#{file}' exists. You can specify a different file using the --file switch." unless File.file?(file)
38
+
39
+ input = File.read(file)
40
+ entries = input.split("\n\n");
41
+
42
+ entries.each do |entry|
43
+ # split dateline and body
44
+ # content[0] has the date and title
45
+ # content[1] has the post body
46
+ content = entry.split("\n")
47
+
48
+ body = get_post_content(content)
49
+ date = get_date(content[0], date_length)
50
+ title = get_title(content[0], date_length)
51
+ slug = create_slug(title)
52
+ filename = create_filename(date, slug, extension)
53
+ meta = create_meta(layout, title, date) # prepare YAML meta data
54
+
55
+ write_file(filename, meta, body) # write to file
56
+ end
57
+ end
58
+
59
+ # strip body from jrnl entry
60
+ def self.get_post_content(content)
61
+ return content[1]
62
+ end
63
+
64
+ # strip timestamp from the dateline
65
+ def self.get_date(content, offset)
66
+ return content[0, offset]
67
+ end
68
+
69
+ # strip title from the dateline
70
+ def self.get_title(content, offset)
71
+ return content[offset + 1, content.length]
72
+ end
73
+
74
+ # generate slug
75
+ def self.create_slug(title)
76
+ return title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
77
+ end
78
+
79
+ # generate filename
80
+ def self.create_filename(date, slug, extension)
81
+ return "#{Time.parse(date).strftime("%Y-%m-%d")}-#{slug}.#{extension}"
82
+ end
83
+
84
+ # Prepare YAML meta data
85
+ #
86
+ # layout - name of the layout
87
+ # title - title of the entry
88
+ # date - date of entry creation
89
+ #
90
+ # Examples
91
+ #
92
+ # create_meta("post", "Entry 1", "2013-01-01 13:00")
93
+ # # => "---\nlayout: post\ntitle: Entry 1\ndate: 2013-01-01 13:00\n"
94
+ #
95
+ # Returns array converted to YAML
96
+ def self.create_meta(layout, title, date)
97
+ data = {
98
+ 'layout' => layout,
99
+ 'title' => title,
100
+ 'date' => Time.parse(date).strftime("%Y-%m-%d %H:%M %z")
101
+ }.to_yaml
102
+ return data;
103
+ end
104
+
105
+ # Writes given data to file
106
+ #
107
+ # filename - name of the output file
108
+ # meta - YAML header data
109
+ # body - jrnl entry content
110
+ #
111
+ # Examples
112
+ #
113
+ # write_file("2013-01-01-entry-1.md", "---\nlayout: post\ntitle: Entry 1\ndate: 2013-01-01 13:00\n", "This is the first entry for my new journal")
114
+ #
115
+ # Writes file to _posts/filename
116
+ def self.write_file(filename, meta, body)
117
+ File.open("_posts/#{filename}", "w") do |f|
118
+ f.puts meta
119
+ f.puts "---\n\n"
120
+ f.puts body
121
+ end
122
+ end
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,72 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Marley < Importer
4
+ def self.validate(options)
5
+ if options['marley_data_dir'].nil?
6
+ Bunto.logger.abort_with "Missing mandatory option --marley_data_dir."
7
+ else
8
+ unless File.directory?(options['marley_data_dir'])
9
+ raise ArgumentError, "marley dir '#{options['marley_data_dir']}' not found"
10
+ end
11
+ end
12
+ end
13
+
14
+ def self.regexp
15
+ { :id => /^\d{0,4}-{0,1}(.*)$/,
16
+ :title => /^#\s*(.*)\s+$/,
17
+ :title_with_date => /^#\s*(.*)\s+\(([0-9\/]+)\)$/,
18
+ :published_on => /.*\s+\(([0-9\/]+)\)$/,
19
+ :perex => /^([^\#\n]+\n)$/,
20
+ :meta => /^\{\{\n(.*)\}\}\n$/mi # Multiline Regexp
21
+ }
22
+ end
23
+
24
+ def self.require_deps
25
+ BuntoImport.require_with_fallback(%w[
26
+ fileutils
27
+ safe_yaml
28
+ ])
29
+ end
30
+
31
+ def self.specify_options(c)
32
+ c.option 'marley_data_dir', '--marley_data_dir DIR', 'The dir containing your marley data'
33
+ end
34
+
35
+ def self.process(options)
36
+ marley_data_dir = options.fetch('marley_data_dir')
37
+
38
+ FileUtils.mkdir_p "_posts"
39
+
40
+ posts = 0
41
+ Dir["#{marley_data_dir}/**/*.txt"].each do |f|
42
+ next unless File.exists?(f)
43
+
44
+ #copied over from marley's app/lib/post.rb
45
+ file_content = File.read(f)
46
+ meta_content = file_content.slice!( self.regexp[:meta] )
47
+ body = file_content.sub( self.regexp[:title], '').sub( self.regexp[:perex], '').strip
48
+
49
+ title = file_content.scan( self.regexp[:title] ).first.to_s.strip
50
+ prerex = file_content.scan( self.regexp[:perex] ).first.to_s.strip
51
+ published_on = DateTime.parse( post[:published_on] ) rescue File.mtime( File.dirname(f) )
52
+ meta = ( meta_content ) ? YAML::load( meta_content.scan( self.regexp[:meta]).to_s ) : {}
53
+ meta['title'] = title
54
+ meta['layout'] = 'post'
55
+
56
+ formatted_date = published_on.strftime('%Y-%m-%d')
57
+ post_name = File.dirname(f).split(%r{/}).last.gsub(/\A\d+-/, '')
58
+
59
+ name = "#{formatted_date}-#{post_name}"
60
+ File.open("_posts/#{name}.markdown", "w") do |f|
61
+ f.puts meta.to_yaml
62
+ f.puts "---\n"
63
+ f.puts "\n#{prerex}\n\n" if prerex
64
+ f.puts body
65
+ end
66
+ posts += 1
67
+ end
68
+ "Created #{posts} posts!"
69
+ end
70
+ end
71
+ end
72
+ end