flee_to_md 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7613246f2d49ea4c2e501f142b1259cf8828ca70
4
+ data.tar.gz: be43b9076d3103d9dcb0a8801d2cbe2387501c37
5
+ SHA512:
6
+ metadata.gz: 775c34d0f86676a94589aaf9ffa56fdb30539baa69e6e52862f21ca84cd950f85136ce911e048db1876e34672dba9ffd57afa72e626a096e4cf41499014c9a0f
7
+ data.tar.gz: dbd8028790806c364e47adb69d5583e6fbbe627efdf9628a21b02ba15a9195dec413ae66ef73e3f7300f2026e277acd2f3a1d1fbcdd113915f9ae8b11114798c
@@ -0,0 +1,4 @@
1
+ *.xml
2
+ *.zip
3
+ .DS_STORE
4
+ *.gem
@@ -0,0 +1,50 @@
1
+ # flee to md
2
+
3
+ helps convert a big xml file (like from squarespace) to markdown files
4
+
5
+ ## installation
6
+
7
+ `gem install flee_to_md`
8
+
9
+ (you'll need [rubygems](https://rubygems.org/pages/download))
10
+
11
+ ## usage
12
+
13
+ run `flee_to_md {your_xml_file}.xml`
14
+
15
+ ## changelog
16
+
17
+ * **2013-04-10**, v0.1.0,
18
+ * repackaged as a rubygem for easier installation / usage
19
+ * **2013-04-01**, v0.0.6
20
+ * better support for windows (I think)
21
+ * **2013-03-23**, v0.0.5
22
+ * downloads attachments
23
+ * **2013-03-21**, v0.0.4
24
+ * removed some of the escaping `\` characters that kramdown was adding
25
+ * removed the trailing hyphens in some filenames (where the title ends in a -> like character)
26
+ * **2013-03-20**, v0.0.3
27
+ * removed web interface ([too slow for (heroku's) love](http://www.youtube.com/watch?v=fiyROQNLhSU))
28
+ * added 1.8.7 compatibility
29
+ * **2013-03-20**, v0.0.2
30
+ * added web interface
31
+ * titles in single quotes
32
+ * **2013-03-19**, v0.0.1 first try
33
+
34
+ ## roadmap
35
+
36
+ * maybe make sure it works with wordpress exports
37
+ * maybe support other output structures
38
+
39
+ ## license
40
+
41
+ The MIT License (MIT)
42
+ Copyright (c) 2013 Max Jacobson
43
+
44
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
45
+
46
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
47
+
48
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
49
+
50
+
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require File.expand_path('../../lib/flee_to_md', __FILE__)
4
+ require 'optparse'
5
+
6
+ ARGV.push "-h" if ARGV.length == 0
7
+ optparse = OptionParser.new do |opts|
8
+ opts.on( '-h', '--help', 'Display this screen' ) do
9
+ puts "run: flee_to_md your_xml_file.xml"
10
+ exit
11
+ end
12
+ opts.on( '-v', '--version', 'Version Information') do
13
+ puts "flee_to_md v#{Flee_to_md::VERSION}"
14
+ exit
15
+ end
16
+ end
17
+
18
+ optparse.parse!
19
+
20
+ filename = ARGV[0]
21
+ xml = File.open(filename).read
22
+ folder = Flee_to_md::Blog.new(xml, filename).write
@@ -0,0 +1,25 @@
1
+ require File.expand_path("../lib/flee_to_md", __FILE__)
2
+
3
+ Gem::Specification.new do |flee_to_md|
4
+ flee_to_md.name = "flee_to_md"
5
+ flee_to_md.version = Flee_to_md::VERSION
6
+ flee_to_md.date = Flee_to_md::LAST_UPDATED
7
+ flee_to_md.summary = "Helps convert a big xml file (like from squarespace) into separate markdown files (like for statamic)"
8
+ flee_to_md.description = File.open("README.md").read
9
+ flee_to_md.authors = ["Max Jacobson"]
10
+ flee_to_md.email = "max@maxjacobson.net"
11
+ flee_to_md.files = Dir[
12
+ './*.{md,gemspec}',
13
+ './.gitignore',
14
+ './bin/*',
15
+ './lib/*'
16
+ ]
17
+ flee_to_md.require_paths = ["lib"]
18
+ flee_to_md.executables = ["flee_to_md"]
19
+ flee_to_md.homepage = "http://rubygems.org/gems/flee_to_md"
20
+ flee_to_md.license = "MIT"
21
+ flee_to_md.required_ruby_version = '>= 1.8.7'
22
+ flee_to_md.add_runtime_dependency 'nokogiri'
23
+ flee_to_md.add_runtime_dependency 'kramdown'
24
+ flee_to_md.add_runtime_dependency 'ruby-progressbar'
25
+ end
@@ -0,0 +1,138 @@
1
+ require 'rubygems'
2
+ require 'nokogiri' # helps parse xml
3
+ require 'kramdown' # markdown implementation
4
+ require 'ruby-progressbar' # for nice output
5
+ require 'open-uri'
6
+
7
+ module Flee_to_md
8
+ VERSION = "0.1.0"
9
+ LAST_UPDATED = "2013-04-10"
10
+ class Blog
11
+ attr_accessor :pages
12
+ def initialize (xml, filename)
13
+ @filename = filename
14
+ document = Nokogiri::XML normalize(xml)
15
+ @pages = Array.new
16
+ items = document.xpath("//item")
17
+ prog = ProgressBar.create(:title => "Reading", :total => items.length)
18
+ items.each_with_index do |item, index|
19
+ @pages.push Page.new(item)
20
+ # break if index == 4
21
+ prog.increment
22
+ end
23
+ end
24
+ def normalize(xml)
25
+ # iron out some kinks that are causing errors
26
+ xml.gsub(/data-image/, 'src')
27
+ end
28
+ def write
29
+ foldername = @filename.gsub(/\.xml$/, '')
30
+ @foldername = foldername
31
+ i = 1
32
+ until File.exists?(@foldername) == false
33
+ @foldername = "#{i}-#{foldername}"
34
+ i += 1
35
+ end
36
+ prog = ProgressBar.create(:title => "Writing", :total => @pages.length)
37
+ Dir.mkdir @foldername
38
+ Dir.mkdir "#{@foldername}/attachments"
39
+ @pages.each do |page|
40
+ if page.attachment_url != "" # is an attachment
41
+ extension = /(\.[\w\d]+)$/
42
+ if page.attachment_url =~ extension
43
+ filename = "#{page.title}#{page.attachment_url.match(extension)[1]}"
44
+ else
45
+ filename = page.title
46
+ end
47
+ open(page.attachment_url) do |f|
48
+ File.open("#{@foldername}/attachments/#{filename}","wb") do |file|
49
+ file.puts f.read
50
+ end
51
+ end
52
+ else # is not an attachment
53
+ str = "---\n"
54
+ str << "title: '#{page.title}'\n"
55
+ str << "date: #{page.date.strftime "%Y-%m-%d %H:%M:%S %z"}\n"
56
+ if page.categories.length > 0
57
+ str << "categories: [#{page.categories.join(", ")}]\n"
58
+ end
59
+ if page.tags.length > 0
60
+ str << "tags: [#{page.tags.join(", ")}]\n"
61
+ end
62
+ if page.type == "linkpost"
63
+ str << "link: #{page.link}\n"
64
+ end
65
+ str << "---\n\n"
66
+ str << page.markdown.gsub(/\\("|'|\[|\]|\:)/,'\1')
67
+ page_filename = "#{page.date.strftime('%Y-%m-%d')}-#{page.title.downcase.gsub(/\s+/, '-').gsub(/[^-\w\d]/,'')}.md".gsub(/[^\w\d](.md)$/, '.md')
68
+ File.open("#{@foldername}/#{page_filename}", 'w') { |file| file.write(str) }
69
+ end
70
+ prog.increment
71
+ end
72
+ puts "Written to #{@foldername}/"
73
+ return @foldername
74
+ end
75
+ end
76
+
77
+ class Page
78
+ attr_accessor :title, :permalink, :link, :date, :html, :markdown,
79
+ :type, :post_id, :status, :tags, :categories, :link,
80
+ :attachment_url, :attachment_filename
81
+ def initialize (item)
82
+ @title = item.xpath("title").children.to_s
83
+ @permalink = item.xpath("link").children.to_s
84
+ date = item.xpath("pubDate").children.to_s
85
+ @date = pubdate_to_ruby_time(date)
86
+ @html = item.xpath("content:encoded").children.to_s.slice(9..-4)
87
+ @markdown = Kramdown::Document.new(@html, :input => 'html').to_kramdown
88
+ @type = item.xpath("wp:post_type").children.to_s
89
+ @post_id = item.xpath("wp:post_id").children.to_s
90
+ @status = item.xpath("wp:status").children.to_s
91
+ link = item.xpath("wp:postmeta/wp:meta_value").children.to_s
92
+ if link != ""
93
+ @type = "linkpost"
94
+ @link = link
95
+ end
96
+ @attachment_url = item.xpath("wp:attachment_url").children.to_s
97
+ if @attachment_url != ""
98
+ @attachment_filename = item.xpath("wp:post_name").children.to_s
99
+ @type = "file"
100
+ end
101
+ @categories = []
102
+ @tags = []
103
+ metas = item.xpath("category")
104
+ metas.each do |meta|
105
+ kind = meta.attributes["domain"].children.to_s # "category" or "post_tag"
106
+ # nickname = meta.attributes["nicename"].to_s # tag or category name
107
+ proper_name = meta.children.to_s.slice(9..-4)
108
+ if kind == "category"
109
+ @categories.push proper_name
110
+ # @categories.push({:nickname => nickname, :proper_name => proper_name})
111
+ elsif kind == "post_tag"
112
+ @tags.push proper_name
113
+ # @tags.push({:nickname => nickname, :proper_name => proper_name})
114
+ end
115
+ end
116
+ end
117
+ def pubdate_to_ruby_time (str)
118
+ pattern = /\w{3}, (\d{2}) (\w{3}) (\d{4}) (\d{2}):(\d{2}):(\d{2}) (\+\d{4})/
119
+ match = str.match(pattern)
120
+ day = match[1]
121
+ month = match[2]
122
+ year = match[3]
123
+ hour = match[4]
124
+ # weird error: many posts have publish times at 24 oclock.
125
+ # was causing errors when trying to construct a ruby time object. out of range
126
+ # what the hell? I'm assuming that means midnight
127
+ hour = "0" if hour == "24"
128
+ minute = match[5]
129
+ second = match[6].to_i
130
+ offset = match[7].sub(/(\d{2})(\d{2})/,'\1:\2')
131
+ if RUBY_VERSION.to_f >= 1.9
132
+ return Time.new year, month, day, hour, minute, second, offset
133
+ else
134
+ return Time.local(year, month, day, hour, minute, second)
135
+ end
136
+ end
137
+ end
138
+ end
metadata ADDED
@@ -0,0 +1,143 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: flee_to_md
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Max Jacobson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-04-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: kramdown
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: ruby-progressbar
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: |+
56
+ # flee to md
57
+
58
+ helps convert a big xml file (like from squarespace) to markdown files
59
+
60
+ ## installation
61
+
62
+ `gem install flee_to_md`
63
+
64
+ (you'll need [rubygems](https://rubygems.org/pages/download))
65
+
66
+ ## usage
67
+
68
+ run `flee_to_md {your_xml_file}.xml`
69
+
70
+ ## changelog
71
+
72
+ * **2013-04-10**, v0.1.0,
73
+ * repackaged as a rubygem for easier installation / usage
74
+ * **2013-04-01**, v0.0.6
75
+ * better support for windows (I think)
76
+ * **2013-03-23**, v0.0.5
77
+ * downloads attachments
78
+ * **2013-03-21**, v0.0.4
79
+ * removed some of the escaping `\` characters that kramdown was adding
80
+ * removed the trailing hyphens in some filenames (where the title ends in a -> like character)
81
+ * **2013-03-20**, v0.0.3
82
+ * removed web interface ([too slow for (heroku's) love](http://www.youtube.com/watch?v=fiyROQNLhSU))
83
+ * added 1.8.7 compatibility
84
+ * **2013-03-20**, v0.0.2
85
+ * added web interface
86
+ * titles in single quotes
87
+ * **2013-03-19**, v0.0.1 first try
88
+
89
+ ## roadmap
90
+
91
+ * maybe make sure it works with wordpress exports
92
+ * maybe support other output structures
93
+
94
+ ## license
95
+
96
+ The MIT License (MIT)
97
+ Copyright (c) 2013 Max Jacobson
98
+
99
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
100
+
101
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
102
+
103
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
104
+
105
+
106
+ email: max@maxjacobson.net
107
+ executables:
108
+ - flee_to_md
109
+ extensions: []
110
+ extra_rdoc_files: []
111
+ files:
112
+ - ./README.md
113
+ - ./flee_to_md.gemspec
114
+ - ./.gitignore
115
+ - ./bin/flee_to_md
116
+ - ./lib/flee_to_md.rb
117
+ - bin/flee_to_md
118
+ homepage: http://rubygems.org/gems/flee_to_md
119
+ licenses:
120
+ - MIT
121
+ metadata: {}
122
+ post_install_message:
123
+ rdoc_options: []
124
+ require_paths:
125
+ - lib
126
+ required_ruby_version: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - '>='
129
+ - !ruby/object:Gem::Version
130
+ version: 1.8.7
131
+ required_rubygems_version: !ruby/object:Gem::Requirement
132
+ requirements:
133
+ - - '>='
134
+ - !ruby/object:Gem::Version
135
+ version: '0'
136
+ requirements: []
137
+ rubyforge_project:
138
+ rubygems_version: 2.0.0
139
+ signing_key:
140
+ specification_version: 4
141
+ summary: Helps convert a big xml file (like from squarespace) into separate markdown
142
+ files (like for statamic)
143
+ test_files: []