immigrada 0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +40 -0
- data/bin/immigrada +31 -0
- data/lib/immigrada.rb +11 -0
- data/lib/immigrada/blogger_entries.rb +29 -0
- data/lib/immigrada/blogger_entry.rb +57 -0
- data/lib/immigrada/local_images_decorator.rb +41 -0
- data/lib/immigrada/markdown_entry.rb +37 -0
- data/lib/immigrada/version.rb +3 -0
- metadata +96 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: bf0a37970ce652e6a71f0c330a59b834a2f34878
|
4
|
+
data.tar.gz: 4ec6a7f0edb1e28aa1fa051ff1831cbead4c823b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2107a90dbc855eb741958aba4d8c5ba1b8c454a97c54a26aeb3c8b21216dc27507fbc9d3dd013f67d281af89c0057de2945fd3e32969fcc54486050af7b3d36e
|
7
|
+
data.tar.gz: 63102c2ff049e2b4d6cb81a20eec62084e61c4e33b288841b2d3b23ad3e6cf1ac685311dc51f92b201f8518cab740d22fc8004a768cf880d78bb8a906757f156
|
data/README.md
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# Immigrada
|
2
|
+
|
3
|
+
## Description
|
4
|
+
|
5
|
+
Small script for converting entries from [Blogger.com](https://draft.blogger.com/) blogging service to markdown files for [Middleman](http://middlemanapp.com/) static site generator. It should be also possible to use the result with other site generators.
|
6
|
+
|
7
|
+
Script parses Blogger Atom XML file for blog entries and saves each of them to separate file in markdown format. It also downloads images from posts and replaces links to them with local paths.
|
8
|
+
|
9
|
+
## Usage
|
10
|
+
|
11
|
+
### Exporting your blog
|
12
|
+
|
13
|
+
1. Sign in to your Blogger account;
|
14
|
+
1. Go to Settings - Other;
|
15
|
+
1. Choose "Export blog" in "Blog tools" section;
|
16
|
+
1. Download XML file with exported entries;
|
17
|
+
|
18
|
+
You can find more instructions [here](https://support.google.com/blogger/answer/97416?hl=en).
|
19
|
+
|
20
|
+
### Script execution
|
21
|
+
|
22
|
+
1. Install Ruby;
|
23
|
+
1. Clone this project and navigate to it;
|
24
|
+
1. Run `bundle install` to download dependiencies;
|
25
|
+
1. Run `ruby bin/immigrada run -i blog.xml` where "blog.xml" is Blogger XML backup, downloaded in previous section.
|
26
|
+
|
27
|
+
## To Do
|
28
|
+
|
29
|
+
There are still a lot of things to do, feel free to contribute:
|
30
|
+
|
31
|
+
- Add to RubyGems;
|
32
|
+
- Resolve problems with line breaks when parsing 'pre' elements;
|
33
|
+
- Add status messages while processing;
|
34
|
+
- Add proper error handling;
|
35
|
+
- Add input file checks for correctness;
|
36
|
+
|
37
|
+
## License
|
38
|
+
|
39
|
+
Project is distributed under [The MIT License](http://opensource.org/licenses/MIT).
|
40
|
+
|
data/bin/immigrada
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'gli'
|
4
|
+
require_relative '../lib/immigrada'
|
5
|
+
|
6
|
+
include GLI::App
|
7
|
+
|
8
|
+
program_desc 'Utility for converting Blogger.com entries to markdown files
|
9
|
+
suitable for static site generators.'
|
10
|
+
|
11
|
+
version Immigrada::VERSION
|
12
|
+
|
13
|
+
desc 'Convert Blogger backup XML to markdown entries.'
|
14
|
+
command :run do |c|
|
15
|
+
c.desc 'set path to Blogger XML backup file.'
|
16
|
+
c.arg_name './blog.xml'
|
17
|
+
c.flag [:i, :input]
|
18
|
+
|
19
|
+
c.desc 'set output directory.'
|
20
|
+
c.arg_name './out'
|
21
|
+
c.default_value './out'
|
22
|
+
c.flag [:o, :output]
|
23
|
+
|
24
|
+
c.action do |_, options, _|
|
25
|
+
Immigrada::BloggerEntries
|
26
|
+
.new
|
27
|
+
.process(options[:i], options[:o])
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
exit run(ARGV)
|
data/lib/immigrada.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'nokogiri'
|
5
|
+
require 'reverse_markdown'
|
6
|
+
|
7
|
+
require_relative 'immigrada/blogger_entries'
|
8
|
+
require_relative 'immigrada/blogger_entry'
|
9
|
+
require_relative 'immigrada/markdown_entry'
|
10
|
+
require_relative 'immigrada/local_images_decorator'
|
11
|
+
require_relative 'immigrada/version'
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Immigrada
|
2
|
+
##
|
3
|
+
# Class for loading and processing blog entries from Blogger backup file.
|
4
|
+
class BloggerEntries
|
5
|
+
# Processes each entry object and saves it to file in markdown format.
|
6
|
+
def process(input_file, out_dir)
|
7
|
+
entries = load(input_file)
|
8
|
+
entries.each do |e|
|
9
|
+
next unless e.post?
|
10
|
+
m_entry = MarkdownEntry.new(e, out_dir)
|
11
|
+
m_entry.extend(LocalImagesDecorator)
|
12
|
+
FileUtils.mkdir_p(out_dir) unless File.directory?(out_dir)
|
13
|
+
m_entry.save
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
# Reads Blogger backup file and parses it for entries. Returns array
|
20
|
+
# of entry objects.
|
21
|
+
def load(file)
|
22
|
+
Nokogiri::XML
|
23
|
+
.parse(File.read(file))
|
24
|
+
.remove_namespaces!
|
25
|
+
.xpath('/feed/entry')
|
26
|
+
.map { |e| BloggerEntry.new(e) }
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module Immigrada
|
2
|
+
##
|
3
|
+
# Class for parsing post data from entry XML object.
|
4
|
+
class BloggerEntry
|
5
|
+
def initialize(entry_xml)
|
6
|
+
@entry_xml = entry_xml
|
7
|
+
end
|
8
|
+
|
9
|
+
# Returns string with entry title.
|
10
|
+
def title
|
11
|
+
@entry_xml
|
12
|
+
.search('title')
|
13
|
+
.text
|
14
|
+
end
|
15
|
+
|
16
|
+
# Returns string with published date.
|
17
|
+
def published
|
18
|
+
raw_date = @entry_xml
|
19
|
+
.search('published')
|
20
|
+
.text
|
21
|
+
Date
|
22
|
+
.parse(raw_date)
|
23
|
+
.strftime('%Y-%m-%d')
|
24
|
+
.to_s
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns array of entry tags.
|
28
|
+
def tags
|
29
|
+
@entry_xml
|
30
|
+
.search('category[@scheme=\'http://www.blogger.com/atom/ns#\']')
|
31
|
+
.map { |c| c['term'] }
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns string with entry slug.
|
35
|
+
def slug
|
36
|
+
link = @entry_xml
|
37
|
+
.search("link[@rel='alternate']")
|
38
|
+
.first['href']
|
39
|
+
File.basename(link, File.extname(link))
|
40
|
+
end
|
41
|
+
|
42
|
+
# Returns string with entry HTML content.
|
43
|
+
def content
|
44
|
+
@entry_xml
|
45
|
+
.search("content[@type='html']")
|
46
|
+
.text
|
47
|
+
end
|
48
|
+
|
49
|
+
# Check if entry is post.
|
50
|
+
def post?
|
51
|
+
@entry_xml
|
52
|
+
.search('id')
|
53
|
+
.text
|
54
|
+
.include?('post')
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Immigrada
|
2
|
+
##
|
3
|
+
# Decorator module to replace remote images in post
|
4
|
+
# with their downloaded module. Extends MarkdownEntry class.
|
5
|
+
module LocalImagesDecorator
|
6
|
+
def combine_post
|
7
|
+
post_dir = "#{@b_entry.published}-#{@b_entry.slug}"
|
8
|
+
@content = insert_local_images(@content, post_dir)
|
9
|
+
super
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
# Finds images, downloads them, and replaces links to them
|
15
|
+
# with local copies.
|
16
|
+
def insert_local_images(entry_html, post_filename)
|
17
|
+
html_doc = Nokogiri::HTML.parse(entry_html)
|
18
|
+
html_doc.xpath('//a[@href]/img[@src]').each do |img|
|
19
|
+
img_url = img.parent['href']
|
20
|
+
post_img_dir = "#{@out_dir}/#{post_filename}"
|
21
|
+
FileUtils.mkdir(post_img_dir) unless File.directory?(post_img_dir)
|
22
|
+
img_filename = File.basename(img_url)
|
23
|
+
img_local_file = "#{post_img_dir}/#{img_filename}"
|
24
|
+
download(img_url, img_local_file)
|
25
|
+
img.parent.replace("<img src='/#{post_filename}/#{img_filename}'/>")
|
26
|
+
img.remove
|
27
|
+
end
|
28
|
+
html_doc
|
29
|
+
end
|
30
|
+
|
31
|
+
# Helper method for downloading files.
|
32
|
+
def download(url, filename)
|
33
|
+
File.open(filename, 'wb') do |saved_file|
|
34
|
+
open(url, 'rb') do |read_file|
|
35
|
+
saved_file.write(read_file.read)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Immigrada
|
2
|
+
##
|
3
|
+
# Class for generation markdown file with post from entry object.
|
4
|
+
class MarkdownEntry
|
5
|
+
def initialize(b_entry, out_dir)
|
6
|
+
@b_entry = b_entry
|
7
|
+
@out_dir = out_dir
|
8
|
+
@content = @b_entry.content
|
9
|
+
end
|
10
|
+
|
11
|
+
# Saves post contents to file.
|
12
|
+
def save
|
13
|
+
filename = combine_filename
|
14
|
+
path = File.join(@out_dir, filename)
|
15
|
+
File.open(path, 'w') { |f| f.write(combine_post) }
|
16
|
+
puts "Post '#{@b_entry.title}' saved to: #{filename}"
|
17
|
+
end
|
18
|
+
|
19
|
+
# Returns string with post: frontmatter and content.
|
20
|
+
def combine_post(params={})
|
21
|
+
<<-EOF.gsub(/^\s+/, '')
|
22
|
+
---
|
23
|
+
title: #{@b_entry.title}
|
24
|
+
date: #{@b_entry.published}
|
25
|
+
tags: #{@b_entry.tags.join(',')}
|
26
|
+
---
|
27
|
+
#{ReverseMarkdown.convert(@content)}
|
28
|
+
EOF
|
29
|
+
end
|
30
|
+
|
31
|
+
# Returns string with filename combined from date and slug.
|
32
|
+
def combine_filename
|
33
|
+
"#{@b_entry.published}-#{@b_entry.slug}.markdown"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
metadata
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: immigrada
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.2'
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Michail Ovchinnikov
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-08-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: reverse_markdown
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: gli
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: "Script parses Blogger Atom XML file for blog entries and \n saves
|
56
|
+
each of them to separate file in markdown format. \n It also
|
57
|
+
downloads images from posts and replaces links to \n them with
|
58
|
+
local paths."
|
59
|
+
email: michail@ovchinnikov.cc
|
60
|
+
executables:
|
61
|
+
- immigrada
|
62
|
+
extensions: []
|
63
|
+
extra_rdoc_files: []
|
64
|
+
files:
|
65
|
+
- README.md
|
66
|
+
- bin/immigrada
|
67
|
+
- lib/immigrada.rb
|
68
|
+
- lib/immigrada/blogger_entries.rb
|
69
|
+
- lib/immigrada/blogger_entry.rb
|
70
|
+
- lib/immigrada/local_images_decorator.rb
|
71
|
+
- lib/immigrada/markdown_entry.rb
|
72
|
+
- lib/immigrada/version.rb
|
73
|
+
homepage: https://github.com/movch/immigrada
|
74
|
+
licenses: []
|
75
|
+
metadata: {}
|
76
|
+
post_install_message:
|
77
|
+
rdoc_options: []
|
78
|
+
require_paths:
|
79
|
+
- lib
|
80
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
requirements: []
|
91
|
+
rubyforge_project: immigrada
|
92
|
+
rubygems_version: 2.4.8
|
93
|
+
signing_key:
|
94
|
+
specification_version: 4
|
95
|
+
summary: Utility for migration from Blogger.com to static blogs generator.
|
96
|
+
test_files: []
|