cnblog2jekyll 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 94aedbf343531cfd933700a5c6b8288555999f17
4
+ data.tar.gz: 53075220909ac4e9c00e0a105c597f123b7ec463
5
+ SHA512:
6
+ metadata.gz: f92488233e70a21000f7c09aae4899af25bfbce0f32dc502336bb150c5d9c56e6879717215135ec3d34972ce2506ef3dbdd6bc2d76b1e7a8cea4282674e28ffb
7
+ data.tar.gz: 821b5c7ed0b6345f6f1de22155beba42693dc4280f7ec561aa19635e520b44011ff8a0673ba5ceaa1089d2fce8d6cd9847b9d64d392dbc918f6c447651a88c57
@@ -0,0 +1,35 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /vendor/bundle
26
+ /lib/bundler/man/
27
+
28
+ # for a library or gem, you might want to ignore these files since the code is
29
+ # intended to run in multiple environments; otherwise, check them in:
30
+ # Gemfile.lock
31
+ # .ruby-version
32
+ # .ruby-gemset
33
+
34
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
35
+ .rvmrc
@@ -0,0 +1,13 @@
1
+ # Contributor Code of Conduct
2
+
3
+ As contributors and maintainers of this project, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities.
4
+
5
+ We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, age, or religion.
6
+
7
+ Examples of unacceptable behavior by participants include the use of sexual language or imagery, derogatory comments or personal attacks, trolling, public or private harassment, insults, or other unprofessional conduct.
8
+
9
+ Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed from the project team.
10
+
11
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting one or more of the project maintainers.
12
+
13
+ This Code of Conduct is adapted from the [Contributor Covenant](http:contributor-covenant.org), version 1.0.0, available at [http://contributor-covenant.org/version/1/0/0/](http://contributor-covenant.org/version/1/0/0/)
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'nokogiri'
4
+ gem 'watir-webdriver'
5
+ gem 'headless'
6
+ gem 'stringex'
7
+
8
+ group :development do
9
+ gem 'pry'
10
+ end
11
+
12
+ # Specify your gem's dependencies in cnblog2jekyll.gemspec
13
+ gemspec
@@ -0,0 +1,47 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ cnblog2jekyll (0.1.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ childprocess (0.5.6)
10
+ ffi (~> 1.0, >= 1.0.11)
11
+ coderay (1.1.0)
12
+ ffi (1.9.8)
13
+ headless (1.0.2)
14
+ method_source (0.8.2)
15
+ mini_portile (0.6.2)
16
+ multi_json (1.11.0)
17
+ nokogiri (1.6.6.2)
18
+ mini_portile (~> 0.6.0)
19
+ pry (0.10.1)
20
+ coderay (~> 1.1.0)
21
+ method_source (~> 0.8.1)
22
+ slop (~> 3.4)
23
+ rake (10.4.2)
24
+ rubyzip (1.1.7)
25
+ selenium-webdriver (2.45.0)
26
+ childprocess (~> 0.5)
27
+ multi_json (~> 1.0)
28
+ rubyzip (~> 1.0)
29
+ websocket (~> 1.0)
30
+ slop (3.6.0)
31
+ stringex (2.5.2)
32
+ watir-webdriver (0.7.0)
33
+ selenium-webdriver (>= 2.45)
34
+ websocket (1.2.1)
35
+
36
+ PLATFORMS
37
+ ruby
38
+
39
+ DEPENDENCIES
40
+ bundler (~> 1.9)
41
+ cnblog2jekyll!
42
+ headless
43
+ nokogiri
44
+ pry
45
+ rake (~> 10.0)
46
+ stringex
47
+ watir-webdriver
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Yanying Wang
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 YanyingWang
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,165 @@
1
+ # Cnblog2jekyll
2
+ Export cnblog's posts to jekyll(把博客园的文章导入到Jekyll)
3
+
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'cnblog2jekyll'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install cnblog2jekyll
20
+
21
+
22
+ ## 使用说明
23
+
24
+
25
+ ### 该脚本的作用
26
+
27
+ * 该脚本适用于想要把个人博客从cnblog迁移到jekyll的用户.
28
+ * 该脚本可以抓取cnblog的博客文章然后转化成jekyll可读取的格式。
29
+ * 新生成的文件在用户home目录的_posts/cnblog下。
30
+
31
+
32
+
33
+ ### 注意
34
+
35
+ * 如果文章设置了访问密码, 导出时请暂时取消密码, 否则无法导入相关随笔.
36
+ * 个人博客不同的主题导致了html/css树结构的不同,使用此RubyGem前,请先切换到elf主题,然后等待数分钟生效。
37
+ * 转换后的随笔分类名字统一为小写字母。
38
+ * 如果分类名字为中文的, 请尽量在转换前将分类名字更换为英文, 并且等待10分钟左右使页面生效.否则, 转换后的分类标签将额外附加提供一个"汉语拼音"的分类名字。
39
+
40
+
41
+
42
+ ### 系统运行环境
43
+
44
+ * 使用此RubyGem的系统环境是Linux系统(因为该Gem是基于Ubuntu系统而写的,亦只在Ubuntu下面做过测试)。
45
+ * Ubuntu系统安装依赖, 请执行如下命令:
46
+ ```shell
47
+ aptitude install xvfb firefox
48
+ ```
49
+
50
+
51
+ ### 使用方法
52
+ ```shell
53
+ gem install cnblog2jekyll
54
+ pry # 或者 irb
55
+ require 'cnblog2jekyll'
56
+ ```
57
+ 或者
58
+ ```shell
59
+ git clone https://github.com/yanyingwang/cnblog2jekyll.git
60
+ cd cnblog2jekyll
61
+ ./bin/console
62
+ ```
63
+
64
+
65
+ **配置用户名**
66
+
67
+ `Cnblog2jekyll.username = 'yywang'`
68
+
69
+ 这里的yywang应该替换成你自己的博客园的用户名,也就是你的博客园主页链接http://cnblogs.com/username处的用户名。
70
+
71
+
72
+
73
+
74
+ **抓取全部文章并且在生成ekyll兼容文章写入本地**
75
+
76
+ `Cnblog2jekyll.generate_markdown_all`
77
+
78
+ 命令输出结果示例:
79
+ ```shell
80
+ [3] pry(main)> Cnblog2jekyll.generate_markdown_all
81
+ article links: http://www.cnblogs.com/5211sss1/p/42519.html
82
+ generate file: /home/yanying/_posts/cnblogs/2015-04-05-测试.markdown
83
+ ```
84
+
85
+
86
+
87
+
88
+ **article_links方法**
89
+
90
+ `Cnblog2jekyll.article_links`
91
+
92
+ 此方法会输出所有文章的链接,同时亦会生成抓取每一个文章的方法,和生成本地jekyll兼容文件的方法。
93
+ 命令输出结果示例:
94
+ ```shell
95
+ [2] pry(main)> Cnblog2jekyll.article_links
96
+ ["http://www.cnblogs.com/yywang/articles/4427313.html"]
97
+ ```
98
+
99
+
100
+
101
+ **查看抓取文章的方法**
102
+
103
+ `Cnblog2jekyll.methods.grep /^article/`
104
+
105
+ 命令输出结果示例:
106
+ ```shell
107
+ [4] pry(main)> Cnblog2jekyll.methods.grep /^article/
108
+ => [:article_links,
109
+ :article_4394519_html,
110
+ :article_4276132_html,
111
+ :article_4183562_html,
112
+ :article_4145271_html,
113
+ :article_4119997_html,
114
+ :article_4116020_html,
115
+ :article_4114471_html,
116
+ :article_4093537_html,
117
+ :article_4068008_html,
118
+ :article_4060830_html,
119
+ :article_4058168_html]
120
+ ```
121
+
122
+
123
+ **抓取单个文章**
124
+
125
+ `Cnblog2jekyll.article_4427313_html`
126
+
127
+ 根据上面得到的抓取文章的方法,调用相应方法抓取需要的文章。
128
+
129
+ 命令输出结果示例:
130
+ ```shell
131
+ [3] pry(main)> Cnblog2jekyll.article_4427313_html
132
+ => {:title=>"这是一篇测试文章", :date=>"2015-04-14 23:33", :category=>[], :content=>"<p>这是一篇测试用的文章。</p>\n\n<p>line1</p>\n\n<p>line2</p>\n"}
133
+ ```
134
+
135
+
136
+
137
+ **查看生成本地jekyll兼容文章的调用方法**
138
+
139
+ `Cnblog2jekyll.methods.grep /^generate/`
140
+
141
+
142
+
143
+ **生成jekyll单个文件写入本地的方法**
144
+
145
+ `Cnblog2jekyll.generate_markdown_4427313_html`
146
+
147
+ 根据上面得到的抓取文章的方法,调用相应方法抓取需要的文章。
148
+
149
+
150
+
151
+
152
+
153
+ ## Development
154
+
155
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
156
+
157
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release` to create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
158
+
159
+ ## Contributing
160
+
161
+ 1. Fork it ( https://github.com/[my-github-username]/cnblog2jekyll/fork )
162
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
163
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
164
+ 4. Push to the branch (`git push origin my-new-feature`)
165
+ 5. Create a new Pull Request
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "cnblog2jekyll"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+
11
+ require "pry"
12
+ Pry.start
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'cnblog2jekyll/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "cnblog2jekyll"
8
+ spec.version = Cnblog2jekyll::VERSION
9
+ spec.authors = ["YanyingWang"]
10
+ spec.email = ["yanyingwang1@gmail.com"]
11
+ spec.summary = %q{cnblog2jekyll}
12
+ spec.description = %q{Export cnblog's posts to jekyll}
13
+ spec.homepage = "https://github.com/yanyingwang/cnblog2jekyll"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
+ spec.bindir = "exe"
18
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.9"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ end
@@ -0,0 +1,113 @@
1
+ require "cnblog2jekyll/version"
2
+ require 'open-uri'
3
+ require 'nokogiri'
4
+ require 'watir-webdriver'
5
+ require 'headless'
6
+ require 'stringex'
7
+
8
+ module Cnblog2jekyll
9
+ class << self
10
+ attr_accessor :username
11
+
12
+ # generate methods :archive_links and :articles_links
13
+ # which content is like this:
14
+ # @archive_links ? @archive_links : get_archive_links
15
+ [:archive_links, :article_links].each do |method_name|
16
+ define_method method_name do
17
+ instance_value = instance_variable_get(("@" + method_name.to_s).to_sym)
18
+ instance_value ? instance_value : send("get_" + method_name.to_s)
19
+ end
20
+ end
21
+
22
+ def get_article_links
23
+ @article_links = archive_links.map do |al|
24
+ html = Nokogiri::HTML(open(al))
25
+ html.css('a.entrylistItemTitle').map { |css_a| css_a['href'] }
26
+ end.flatten
27
+
28
+ # generate article_id_html methods
29
+ # generate generate_markdown_article_id_html methods
30
+ @article_links.each do |al|
31
+ al_string = al.split(/[\/.]/).last(2).join("_")
32
+
33
+ article_method = ["article", al_string].join("_")
34
+ send(:define_singleton_method, article_method) { article(al) }
35
+
36
+ generate_markdown_method = ["generate_markdown", al_string].join("_")
37
+ send(:define_singleton_method, generate_markdown_method) { generate_markdown(al) }
38
+ end
39
+ end
40
+
41
+ def generate_markdown_all
42
+ article_links.each { |al| generate_markdown(al) }
43
+ end
44
+
45
+
46
+ private
47
+ def get_archive_links
48
+ headless = Headless.new
49
+ headless.start
50
+
51
+ browser = Watir::Browser.start 'cnblogs.com/' + @username
52
+ html = Nokogiri::HTML.parse(browser.html)
53
+ @archive_links = []; html.css('div#blog-sidecolumn a').each do |e|
54
+ @archive_links << e['href'] if e['href'] =~ /archive/
55
+ end
56
+
57
+ browser.close
58
+ headless.destroy
59
+
60
+ @archive_links
61
+ end
62
+
63
+ def article(al)
64
+ headless = Headless.new
65
+ headless.start
66
+
67
+ browser = Watir::Browser.start al
68
+ html = Nokogiri::HTML.parse(browser.html)
69
+
70
+ title = html.css('a#cb_post_title_url').text.gsub("\"","")
71
+ date = html.css('span#post-date').text
72
+ content = html.css('div#cnblogs_post_body').to_s.each_line.to_a[1...-1].join("\n").gsub(/\r\n/, "\n")
73
+ category = [];
74
+ html.css('div#BlogPostCategory a').each do |e|
75
+ category << e.text.downcase
76
+ category << e.text.to_url if e.text =~ /\p{Han}+/
77
+ end
78
+
79
+ browser.close
80
+ headless.destroy
81
+
82
+ { title: title, date: date, category: category, content: content }
83
+ end
84
+
85
+ def generate_markdown(al)
86
+ art = article(al)
87
+
88
+ filename = art[:date].match(/....-..-../).to_s + "-" + art[:title].scan(/[a-zA-Z0-9\p{Han}]+/).join("-") + ".markdown"
89
+ content = <<-EOF.gsub(/^\s+/, "")
90
+ ---
91
+ layout: post
92
+ title: "#{art[:title]}"
93
+ date: "#{art[:date]} +0800"
94
+ comments: true
95
+ categories: [ #{art[:category].join(", ")} ]
96
+ ---
97
+ #{art[:content]}
98
+ EOF
99
+
100
+ dirname = File.join(Dir.home, "_posts/cnblogs")
101
+ FileUtils.mkdir_p(dirname) unless Dir.exist?(dirname)
102
+
103
+ if File.open(dirname + "/" + filename, 'w') { |f| f.write(content) }
104
+ puts <<-EOF
105
+ article links: #{al}
106
+ generate file: #{dirname}/#{filename}
107
+ EOF
108
+ end
109
+ end
110
+
111
+ end
112
+ end
113
+
@@ -0,0 +1,3 @@
1
+ module Cnblog2jekyll
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,85 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cnblog2jekyll
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - YanyingWang
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2015-04-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.9'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ description: Export cnblog's posts to jekyll
42
+ email:
43
+ - yanyingwang1@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".gitignore"
49
+ - CODE_OF_CONDUCT.md
50
+ - Gemfile
51
+ - Gemfile.lock
52
+ - LICENSE
53
+ - LICENSE.txt
54
+ - README.md
55
+ - Rakefile
56
+ - bin/console
57
+ - bin/setup
58
+ - cnblog2jekyll.gemspec
59
+ - lib/cnblog2jekyll.rb
60
+ - lib/cnblog2jekyll/version.rb
61
+ homepage: https://github.com/yanyingwang/cnblog2jekyll
62
+ licenses:
63
+ - MIT
64
+ metadata: {}
65
+ post_install_message:
66
+ rdoc_options: []
67
+ require_paths:
68
+ - lib
69
+ required_ruby_version: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: '0'
74
+ required_rubygems_version: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ requirements: []
80
+ rubyforge_project:
81
+ rubygems_version: 2.4.6
82
+ signing_key:
83
+ specification_version: 4
84
+ summary: cnblog2jekyll
85
+ test_files: []