news2kindle 0.1.5 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/bin/test-generator +1 -0
- data/lib/news2kindle/dup_checker.rb +1 -1
- data/lib/news2kindle/generator/nikkei-paid.rb +14 -17
- data/lib/news2kindle/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b6359f3d5bd815e2288ae183bdf574a429f64318
|
4
|
+
data.tar.gz: 746ab24028ae050952b3b84a58d78ec8b0e7230c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 45d90ea6ee619e73aad9c61ad2cdcca1ecd5ef3493553618bb8ba79e534dc5582b5a6bd62f9c39e20a6420345537a886b1f16025b0dfa01162cce7875998d5ef
|
7
|
+
data.tar.gz: 4a5fc8651f8163b566191c048fe49387599db5b6e6e5c381d68762fb5ed26a6137bb368a52dd5d5eadfc6adb6bedf5d6c151958d5900073a96f4b34273dfd676
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
news2kindle (0.
|
4
|
+
news2kindle (0.2.0)
|
5
5
|
dropbox_api
|
6
6
|
kindlegen
|
7
7
|
mail
|
@@ -55,7 +55,7 @@ GEM
|
|
55
55
|
mime-types (3.1)
|
56
56
|
mime-types-data (~> 3.2015)
|
57
57
|
mime-types-data (3.2016.0521)
|
58
|
-
mini_mime (0.
|
58
|
+
mini_mime (1.0.0)
|
59
59
|
mini_portile2 (2.3.0)
|
60
60
|
minitest (5.10.3)
|
61
61
|
mongo (2.4.3)
|
data/bin/test-generator
CHANGED
@@ -55,28 +55,25 @@ module News2Kindle
|
|
55
55
|
# scraping top news
|
56
56
|
#
|
57
57
|
toc_top = ['TOP NEWS']
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
toc_top << [canonical( a.text.strip ), uri]
|
63
|
-
end
|
58
|
+
(agent.page / '#JSID_baseRefreshNxTop2 h3 a').each do |a|
|
59
|
+
uri = a.attr('href')
|
60
|
+
next if News2Kindle::DupChecker.dup?(uri)
|
61
|
+
toc_top << [canonical(a.text.strip), uri]
|
64
62
|
end
|
65
63
|
toc << toc_top
|
66
64
|
|
67
65
|
#
|
68
66
|
# scraping all categories
|
69
67
|
#
|
70
|
-
(agent.page / 'div.
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
end
|
68
|
+
(agent.page / 'div.m-miM11_box').each do |genre|
|
69
|
+
headline = genre / 'div.m-headline h3'
|
70
|
+
toc_cat = [headline.text]
|
71
|
+
agent.get((headline / 'a').attr('href'))
|
72
|
+
(agent.page / '#CONTENTS_MAIN h3 a').each do |article|
|
73
|
+
uri = article.attr('href')
|
74
|
+
next unless article.attr('href') =~ %r|^/article/|
|
75
|
+
next if News2Kindle::DupChecker.dup?(uri)
|
76
|
+
toc_cat << [canonical(article.text), uri]
|
80
77
|
end
|
81
78
|
toc << toc_cat
|
82
79
|
end
|
@@ -150,7 +147,7 @@ module News2Kindle
|
|
150
147
|
begin
|
151
148
|
#puts "getting html #{aid}#{sub}"
|
152
149
|
retry_loop( 5 ) do
|
153
|
-
agent.get(
|
150
|
+
agent.get("#{TOP}/news/print-article/?ng=#{aid}")
|
154
151
|
html = agent.page.root
|
155
152
|
sleep 1
|
156
153
|
end
|
data/lib/news2kindle/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: news2kindle
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TADA Tadashi
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-11-
|
11
|
+
date: 2017-11-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: kindlegen
|