flee_to_md 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +4 -0
- data/README.md +50 -0
- data/bin/flee_to_md +22 -0
- data/flee_to_md.gemspec +25 -0
- data/lib/flee_to_md.rb +138 -0
- metadata +143 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7613246f2d49ea4c2e501f142b1259cf8828ca70
|
4
|
+
data.tar.gz: be43b9076d3103d9dcb0a8801d2cbe2387501c37
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 775c34d0f86676a94589aaf9ffa56fdb30539baa69e6e52862f21ca84cd950f85136ce911e048db1876e34672dba9ffd57afa72e626a096e4cf41499014c9a0f
|
7
|
+
data.tar.gz: dbd8028790806c364e47adb69d5583e6fbbe627efdf9628a21b02ba15a9195dec413ae66ef73e3f7300f2026e277acd2f3a1d1fbcdd113915f9ae8b11114798c
|
data/.gitignore
ADDED
data/README.md
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# flee to md
|
2
|
+
|
3
|
+
helps convert a big xml file (like from squarespace) to markdown files
|
4
|
+
|
5
|
+
## installation
|
6
|
+
|
7
|
+
`gem install flee_to_md`
|
8
|
+
|
9
|
+
(you'll need [rubygems](https://rubygems.org/pages/download))
|
10
|
+
|
11
|
+
## usage
|
12
|
+
|
13
|
+
run `flee_to_md {your_xml_file}.xml`
|
14
|
+
|
15
|
+
## changelog
|
16
|
+
|
17
|
+
* **2013-04-10**, v0.1.0,
|
18
|
+
* repackaged as a rubygem for easier installation / usage
|
19
|
+
* **2013-04-01**, v0.0.6
|
20
|
+
* better support for windows (I think)
|
21
|
+
* **2013-03-23**, v0.0.5
|
22
|
+
* downloads attachments
|
23
|
+
* **2013-03-21**, v0.0.4
|
24
|
+
* removed some of the escaping `\` characters that kramdown was adding
|
25
|
+
* removed the trailing hyphens in some filenames (where the title ends in a -> like character)
|
26
|
+
* **2013-03-20**, v0.0.3
|
27
|
+
* removed web interface ([too slow for (heroku's) love](http://www.youtube.com/watch?v=fiyROQNLhSU))
|
28
|
+
* added 1.8.7 compatibility
|
29
|
+
* **2013-03-20**, v0.0.2
|
30
|
+
* added web interface
|
31
|
+
* titles in single quotes
|
32
|
+
* **2013-03-19**, v0.0.1 first try
|
33
|
+
|
34
|
+
## roadmap
|
35
|
+
|
36
|
+
* maybe make sure it works with wordpress exports
|
37
|
+
* maybe support other output structures
|
38
|
+
|
39
|
+
## license
|
40
|
+
|
41
|
+
The MIT License (MIT)
|
42
|
+
Copyright (c) 2013 Max Jacobson
|
43
|
+
|
44
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
45
|
+
|
46
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
47
|
+
|
48
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
49
|
+
|
50
|
+
|
data/bin/flee_to_md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require File.expand_path('../../lib/flee_to_md', __FILE__)
|
4
|
+
require 'optparse'
|
5
|
+
|
6
|
+
ARGV.push "-h" if ARGV.length == 0
|
7
|
+
optparse = OptionParser.new do |opts|
|
8
|
+
opts.on( '-h', '--help', 'Display this screen' ) do
|
9
|
+
puts "run: flee_to_md your_xml_file.xml"
|
10
|
+
exit
|
11
|
+
end
|
12
|
+
opts.on( '-v', '--version', 'Version Information') do
|
13
|
+
puts "flee_to_md v#{Flee_to_md::VERSION}"
|
14
|
+
exit
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
optparse.parse!
|
19
|
+
|
20
|
+
filename = ARGV[0]
|
21
|
+
xml = File.open(filename).read
|
22
|
+
folder = Flee_to_md::Blog.new(xml, filename).write
|
data/flee_to_md.gemspec
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require File.expand_path("../lib/flee_to_md", __FILE__)
|
2
|
+
|
3
|
+
Gem::Specification.new do |flee_to_md|
|
4
|
+
flee_to_md.name = "flee_to_md"
|
5
|
+
flee_to_md.version = Flee_to_md::VERSION
|
6
|
+
flee_to_md.date = Flee_to_md::LAST_UPDATED
|
7
|
+
flee_to_md.summary = "Helps convert a big xml file (like from squarespace) into separate markdown files (like for statamic)"
|
8
|
+
flee_to_md.description = File.open("README.md").read
|
9
|
+
flee_to_md.authors = ["Max Jacobson"]
|
10
|
+
flee_to_md.email = "max@maxjacobson.net"
|
11
|
+
flee_to_md.files = Dir[
|
12
|
+
'./*.{md,gemspec}',
|
13
|
+
'./.gitignore',
|
14
|
+
'./bin/*',
|
15
|
+
'./lib/*'
|
16
|
+
]
|
17
|
+
flee_to_md.require_paths = ["lib"]
|
18
|
+
flee_to_md.executables = ["flee_to_md"]
|
19
|
+
flee_to_md.homepage = "http://rubygems.org/gems/flee_to_md"
|
20
|
+
flee_to_md.license = "MIT"
|
21
|
+
flee_to_md.required_ruby_version = '>= 1.8.7'
|
22
|
+
flee_to_md.add_runtime_dependency 'nokogiri'
|
23
|
+
flee_to_md.add_runtime_dependency 'kramdown'
|
24
|
+
flee_to_md.add_runtime_dependency 'ruby-progressbar'
|
25
|
+
end
|
data/lib/flee_to_md.rb
ADDED
@@ -0,0 +1,138 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'nokogiri' # helps parse xml
|
3
|
+
require 'kramdown' # markdown implementation
|
4
|
+
require 'ruby-progressbar' # for nice output
|
5
|
+
require 'open-uri'
|
6
|
+
|
7
|
+
module Flee_to_md
|
8
|
+
VERSION = "0.1.0"
|
9
|
+
LAST_UPDATED = "2013-04-10"
|
10
|
+
class Blog
|
11
|
+
attr_accessor :pages
|
12
|
+
def initialize (xml, filename)
|
13
|
+
@filename = filename
|
14
|
+
document = Nokogiri::XML normalize(xml)
|
15
|
+
@pages = Array.new
|
16
|
+
items = document.xpath("//item")
|
17
|
+
prog = ProgressBar.create(:title => "Reading", :total => items.length)
|
18
|
+
items.each_with_index do |item, index|
|
19
|
+
@pages.push Page.new(item)
|
20
|
+
# break if index == 4
|
21
|
+
prog.increment
|
22
|
+
end
|
23
|
+
end
|
24
|
+
def normalize(xml)
|
25
|
+
# iron out some kinks that are causing errors
|
26
|
+
xml.gsub(/data-image/, 'src')
|
27
|
+
end
|
28
|
+
def write
|
29
|
+
foldername = @filename.gsub(/\.xml$/, '')
|
30
|
+
@foldername = foldername
|
31
|
+
i = 1
|
32
|
+
until File.exists?(@foldername) == false
|
33
|
+
@foldername = "#{i}-#{foldername}"
|
34
|
+
i += 1
|
35
|
+
end
|
36
|
+
prog = ProgressBar.create(:title => "Writing", :total => @pages.length)
|
37
|
+
Dir.mkdir @foldername
|
38
|
+
Dir.mkdir "#{@foldername}/attachments"
|
39
|
+
@pages.each do |page|
|
40
|
+
if page.attachment_url != "" # is an attachment
|
41
|
+
extension = /(\.[\w\d]+)$/
|
42
|
+
if page.attachment_url =~ extension
|
43
|
+
filename = "#{page.title}#{page.attachment_url.match(extension)[1]}"
|
44
|
+
else
|
45
|
+
filename = page.title
|
46
|
+
end
|
47
|
+
open(page.attachment_url) do |f|
|
48
|
+
File.open("#{@foldername}/attachments/#{filename}","wb") do |file|
|
49
|
+
file.puts f.read
|
50
|
+
end
|
51
|
+
end
|
52
|
+
else # is not an attachment
|
53
|
+
str = "---\n"
|
54
|
+
str << "title: '#{page.title}'\n"
|
55
|
+
str << "date: #{page.date.strftime "%Y-%m-%d %H:%M:%S %z"}\n"
|
56
|
+
if page.categories.length > 0
|
57
|
+
str << "categories: [#{page.categories.join(", ")}]\n"
|
58
|
+
end
|
59
|
+
if page.tags.length > 0
|
60
|
+
str << "tags: [#{page.tags.join(", ")}]\n"
|
61
|
+
end
|
62
|
+
if page.type == "linkpost"
|
63
|
+
str << "link: #{page.link}\n"
|
64
|
+
end
|
65
|
+
str << "---\n\n"
|
66
|
+
str << page.markdown.gsub(/\\("|'|\[|\]|\:)/,'\1')
|
67
|
+
page_filename = "#{page.date.strftime('%Y-%m-%d')}-#{page.title.downcase.gsub(/\s+/, '-').gsub(/[^-\w\d]/,'')}.md".gsub(/[^\w\d](.md)$/, '.md')
|
68
|
+
File.open("#{@foldername}/#{page_filename}", 'w') { |file| file.write(str) }
|
69
|
+
end
|
70
|
+
prog.increment
|
71
|
+
end
|
72
|
+
puts "Written to #{@foldername}/"
|
73
|
+
return @foldername
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
class Page
|
78
|
+
attr_accessor :title, :permalink, :link, :date, :html, :markdown,
|
79
|
+
:type, :post_id, :status, :tags, :categories, :link,
|
80
|
+
:attachment_url, :attachment_filename
|
81
|
+
def initialize (item)
|
82
|
+
@title = item.xpath("title").children.to_s
|
83
|
+
@permalink = item.xpath("link").children.to_s
|
84
|
+
date = item.xpath("pubDate").children.to_s
|
85
|
+
@date = pubdate_to_ruby_time(date)
|
86
|
+
@html = item.xpath("content:encoded").children.to_s.slice(9..-4)
|
87
|
+
@markdown = Kramdown::Document.new(@html, :input => 'html').to_kramdown
|
88
|
+
@type = item.xpath("wp:post_type").children.to_s
|
89
|
+
@post_id = item.xpath("wp:post_id").children.to_s
|
90
|
+
@status = item.xpath("wp:status").children.to_s
|
91
|
+
link = item.xpath("wp:postmeta/wp:meta_value").children.to_s
|
92
|
+
if link != ""
|
93
|
+
@type = "linkpost"
|
94
|
+
@link = link
|
95
|
+
end
|
96
|
+
@attachment_url = item.xpath("wp:attachment_url").children.to_s
|
97
|
+
if @attachment_url != ""
|
98
|
+
@attachment_filename = item.xpath("wp:post_name").children.to_s
|
99
|
+
@type = "file"
|
100
|
+
end
|
101
|
+
@categories = []
|
102
|
+
@tags = []
|
103
|
+
metas = item.xpath("category")
|
104
|
+
metas.each do |meta|
|
105
|
+
kind = meta.attributes["domain"].children.to_s # "category" or "post_tag"
|
106
|
+
# nickname = meta.attributes["nicename"].to_s # tag or category name
|
107
|
+
proper_name = meta.children.to_s.slice(9..-4)
|
108
|
+
if kind == "category"
|
109
|
+
@categories.push proper_name
|
110
|
+
# @categories.push({:nickname => nickname, :proper_name => proper_name})
|
111
|
+
elsif kind == "post_tag"
|
112
|
+
@tags.push proper_name
|
113
|
+
# @tags.push({:nickname => nickname, :proper_name => proper_name})
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
def pubdate_to_ruby_time (str)
|
118
|
+
pattern = /\w{3}, (\d{2}) (\w{3}) (\d{4}) (\d{2}):(\d{2}):(\d{2}) (\+\d{4})/
|
119
|
+
match = str.match(pattern)
|
120
|
+
day = match[1]
|
121
|
+
month = match[2]
|
122
|
+
year = match[3]
|
123
|
+
hour = match[4]
|
124
|
+
# weird error: many posts have publish times at 24 oclock.
|
125
|
+
# was causing errors when trying to construct a ruby time object. out of range
|
126
|
+
# what the hell? I'm assuming that means midnight
|
127
|
+
hour = "0" if hour == "24"
|
128
|
+
minute = match[5]
|
129
|
+
second = match[6].to_i
|
130
|
+
offset = match[7].sub(/(\d{2})(\d{2})/,'\1:\2')
|
131
|
+
if RUBY_VERSION.to_f >= 1.9
|
132
|
+
return Time.new year, month, day, hour, minute, second, offset
|
133
|
+
else
|
134
|
+
return Time.local(year, month, day, hour, minute, second)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
metadata
ADDED
@@ -0,0 +1,143 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: flee_to_md
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Max Jacobson
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-04-10 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: kramdown
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: ruby-progressbar
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: |+
|
56
|
+
# flee to md
|
57
|
+
|
58
|
+
helps convert a big xml file (like from squarespace) to markdown files
|
59
|
+
|
60
|
+
## installation
|
61
|
+
|
62
|
+
`gem install flee_to_md`
|
63
|
+
|
64
|
+
(you'll need [rubygems](https://rubygems.org/pages/download))
|
65
|
+
|
66
|
+
## usage
|
67
|
+
|
68
|
+
run `flee_to_md {your_xml_file}.xml`
|
69
|
+
|
70
|
+
## changelog
|
71
|
+
|
72
|
+
* **2013-04-10**, v0.1.0,
|
73
|
+
* repackaged as a rubygem for easier installation / usage
|
74
|
+
* **2013-04-01**, v0.0.6
|
75
|
+
* better support for windows (I think)
|
76
|
+
* **2013-03-23**, v0.0.5
|
77
|
+
* downloads attachments
|
78
|
+
* **2013-03-21**, v0.0.4
|
79
|
+
* removed some of the escaping `\` characters that kramdown was adding
|
80
|
+
* removed the trailing hyphens in some filenames (where the title ends in a -> like character)
|
81
|
+
* **2013-03-20**, v0.0.3
|
82
|
+
* removed web interface ([too slow for (heroku's) love](http://www.youtube.com/watch?v=fiyROQNLhSU))
|
83
|
+
* added 1.8.7 compatibility
|
84
|
+
* **2013-03-20**, v0.0.2
|
85
|
+
* added web interface
|
86
|
+
* titles in single quotes
|
87
|
+
* **2013-03-19**, v0.0.1 first try
|
88
|
+
|
89
|
+
## roadmap
|
90
|
+
|
91
|
+
* maybe make sure it works with wordpress exports
|
92
|
+
* maybe support other output structures
|
93
|
+
|
94
|
+
## license
|
95
|
+
|
96
|
+
The MIT License (MIT)
|
97
|
+
Copyright (c) 2013 Max Jacobson
|
98
|
+
|
99
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
100
|
+
|
101
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
102
|
+
|
103
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
104
|
+
|
105
|
+
|
106
|
+
email: max@maxjacobson.net
|
107
|
+
executables:
|
108
|
+
- flee_to_md
|
109
|
+
extensions: []
|
110
|
+
extra_rdoc_files: []
|
111
|
+
files:
|
112
|
+
- ./README.md
|
113
|
+
- ./flee_to_md.gemspec
|
114
|
+
- ./.gitignore
|
115
|
+
- ./bin/flee_to_md
|
116
|
+
- ./lib/flee_to_md.rb
|
117
|
+
- bin/flee_to_md
|
118
|
+
homepage: http://rubygems.org/gems/flee_to_md
|
119
|
+
licenses:
|
120
|
+
- MIT
|
121
|
+
metadata: {}
|
122
|
+
post_install_message:
|
123
|
+
rdoc_options: []
|
124
|
+
require_paths:
|
125
|
+
- lib
|
126
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - '>='
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: 1.8.7
|
131
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
132
|
+
requirements:
|
133
|
+
- - '>='
|
134
|
+
- !ruby/object:Gem::Version
|
135
|
+
version: '0'
|
136
|
+
requirements: []
|
137
|
+
rubyforge_project:
|
138
|
+
rubygems_version: 2.0.0
|
139
|
+
signing_key:
|
140
|
+
specification_version: 4
|
141
|
+
summary: Helps convert a big xml file (like from squarespace) into separate markdown
|
142
|
+
files (like for statamic)
|
143
|
+
test_files: []
|