news2kindle 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,90 @@
1
+ # -*- coding: utf-8; -*-
2
+ #
3
+ # scraping jp.wsj.com for Kindlizer
4
+ #
5
+
6
+ require File.expand_path('../wsj-paid', __FILE__ )
7
+
8
+ module Kindlizer
9
+ module Generator
10
+ class WsjusPaid < WsjPaid
11
+ TOP = 'http://online.wsj.com'
12
+ LOGIN = "https://id.wsj.com/access/pages/wsj/us/login_standalone.html"
13
+
14
+ def generate(opts)
15
+ @now = opts[:now]
16
+ @now_str = now.strftime '%Y-%m-%d %H:%M'
17
+ @title = "WSJ U.S."
18
+ @lang = "en-US"
19
+ FileUtils.cp( "./resource/wsj-us.jpg", @dst_dir + "/wsj.jpg")
20
+
21
+ agent = Mechanize::new
22
+ agent.set_proxy( *ENV['HTTP_PROXY'].split( /:/ ) ) if ENV['HTTP_PROXY']
23
+
24
+ toc = []
25
+
26
+ agent.get(LOGIN)
27
+
28
+ form = agent.page.forms.first
29
+ form.action = ('https://id.wsj.com/auth/submitlogin.json')
30
+ form['username'] = @wsj_id
31
+ form['password'] = @wsj_pw
32
+ agent.page.forms.first.submit
33
+
34
+ response = JSON.parse(agent.page.body)
35
+ agent.get( response["url"] )
36
+
37
+ agent.get( TOP + "/home-page?_wsjregion=na,us&_homepage=/home/us")
38
+
39
+ #
40
+ # scraping top news
41
+ #
42
+ toc_top = ['TOP NEWS']
43
+ (agent.page / "div.whatsNews ul.newsItem h2 a").each do |a|
44
+ if(a.attr('href') =~ /^http:\/\/online.wsj.com\/article\// or a.attr('href') =~ /^\/article\//)
45
+ toc_top << [canonical( a.text.strip ), a.attr( 'href' )]
46
+ end
47
+ end
48
+ toc << toc_top
49
+
50
+ #
51
+ # scraping all categories
52
+ (agent.page.root / 'div.wsjMainNav li').each do |li|
53
+ a = (li / 'a').first
54
+
55
+ title = a.text.strip
56
+ next if(title == "Home" or title == "Market Data" or title == "C-Suite")
57
+
58
+ toc_cat = []
59
+ toc_cat << canonical( title )
60
+ begin
61
+ retry_loop( 5 ) do
62
+ agent.get(a.attr( 'href' ))
63
+ sleep 1
64
+ end
65
+ rescue
66
+ News2Kindle.logger.error "cannot get #{uri}."
67
+ raise
68
+ end
69
+
70
+ count = 0
71
+ newsLinks = (agent.page / "div.whatsNews ul.newsItem h2 a")
72
+ newsLinks = (agent.page / "div.headlineSummary ul.newsItem h2 a" ) if(newsLinks.size == 0)
73
+ newsLinks.each do |a|
74
+ if(a.attr('href') =~ /^http:\/\/online.wsj.com\/article\// or a.attr('href') =~ /^\/article\//)
75
+ toc_cat << [canonical( a.text.strip ), a.attr( 'href' )]
76
+ count += 1
77
+ break if(count >= 8)
78
+ end
79
+ end
80
+ toc << toc_cat
81
+ end
82
+
83
+ begin
84
+ generate_contents( toc, agent )
85
+ yield "#{@dst_dir}/wsj-paid.opf"
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,116 @@
1
+ # task controller
2
+ #
3
+ # Copyright (C) 2017 by TADA Tadashi <t@tdtds.jp>
4
+ # Distributed under GPL.
5
+ #
6
+ require 'pit'
7
+ require 'kindlegen'
8
+ require 'mail'
9
+ require 'dropbox_api'
10
+
11
+ class DropboxApi::Client
12
+ def chunk_upload(dropbox_file)
13
+ info = DropboxApi::Metadata::CommitInfo.new('path'=>dropbox_file, 'mode'=>:add)
14
+ cursor = upload_session_start('')
15
+ while data = yield
16
+ upload_session_append_v2(cursor, data)
17
+ end
18
+ upload_session_finish(cursor, info)
19
+ end
20
+ end
21
+
22
+ module News2Kindle
23
+ class Task
24
+ def initialize(name)
25
+ @name = name
26
+ require "news2kindle/generator/#{@name}"
27
+ @generator = News2Kindle::Generator.const_get(@name.split(/-/).map{|a|a.capitalize}.join)
28
+ end
29
+
30
+ def run(to, from, opts)
31
+ Dir.mktmpdir do |dir|
32
+ @generator::new(dir).generate(opts) do |opf|
33
+ file = "#{@name}.#{Time.now.strftime("%Y%m%d%H%M%S%2N")}.mobi"
34
+ Kindlegen.run(opf, '-o', file, '-locale', 'ja')
35
+ mobi = Pathname(opf).dirname + file
36
+ if mobi.file?
37
+ News2Kindle.logger.info "generated #{mobi} successfully."
38
+ deliver([to].flatten, from, mobi, opts)
39
+ else
40
+ News2Kindle.logger.error 'failed mobi generation.'
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ private
47
+ def deliver(to_address, from_address, mobi, opts)
48
+ to_dropbox = to_address.map{|a| /^dropbox:/ =~ a ? a : nil}.compact
49
+ deliver_via_dropbox(to_dropbox, mobi)
50
+ deliver_via_mail(to_address - to_dropbox, from_address, mobi, opts)
51
+ end
52
+
53
+ def deliver_via_mail(to_address, from_address, mobi, opts)
54
+ return if to_address.empty?
55
+
56
+ settings = opts[:email]
57
+ if settings[:user_name] or settings[:password]
58
+ account = Pit::get('news2kindle', require: {
59
+ mail_user_name: 'your e-mail id',
60
+ mail_password: 'your e-mail password'
61
+ })
62
+ settings[:user_name] = account[:mail_user_name]
63
+ settings[:password] = account[:mail_password]
64
+ end
65
+ Mail.defaults{delivery_method :smtp, settings}
66
+ Mail.deliver do
67
+ from from_address
68
+ to to_address
69
+ subject 'sent by news2kindle'
70
+ body 'dummy text'
71
+ attachments[mobi.basename.to_s] = {
72
+ :mime_type => 'application/octet-stream',
73
+ :content => open(mobi, &:read)
74
+ }
75
+ end
76
+ News2Kindle.logger.info "sent mails successfully."
77
+ end
78
+
79
+ def deliver_via_dropbox(to_address, mobi)
80
+ return if to_address.empty?
81
+
82
+ begin
83
+ auth = Pit::get('news2kindle')
84
+ unless auth[:dropbox_token]
85
+ print "Enter dropbox app key: "
86
+ api_key = $stdin.gets.chomp
87
+
88
+ print "Enter dropbox app secret: "
89
+ api_secret = $stdin.gets.chomp
90
+
91
+ authenticator = DropboxApi::Authenticator.new(api_key, api_secret)
92
+ puts "\nGo to this url and click 'Authorize' to get the token:"
93
+ puts authenticator.authorize_url
94
+
95
+ print "Enter the token: "
96
+ code = $stdin.gets.chomp
97
+
98
+ auth[:dropbox_token] = authenticator.get_token(code).token
99
+ Pit::set('news2kindle', data: auth)
100
+ end
101
+ client = DropboxApi::Client.new(auth[:dropbox_token])
102
+ to_address.each do |address|
103
+ file = Pathname(address.sub(/^dropbox:/, '')) + mobi.basename
104
+ open(mobi) do |f|
105
+ client.chunk_upload(file){f.read(10_000_000)}
106
+ end
107
+ News2Kindle.logger.info "saved to #{file} successfully."
108
+ end
109
+ rescue
110
+ News2Kindle.logger.error "failed while saving to dropbox."
111
+ News2Kindle.logger.debug $!
112
+ $@.each{|l| News2Kindle.logger.debug l}
113
+ end
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,3 @@
1
+ module News2Kindle
2
+ VERSION = "0.1.1"
3
+ end
@@ -0,0 +1,37 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "news2kindle/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "news2kindle"
8
+ spec.version = News2Kindle::VERSION
9
+ spec.authors = ["TADA Tadashi"]
10
+ spec.email = ["t@tdtds.jp"]
11
+
12
+ spec.summary = %q{scrape some news site and deliver to kindle}
13
+ spec.description = %q{scrape some news site and deliver to kindle}
14
+ spec.homepage = "https://github.com/tdtds/news2kindle"
15
+ spec.license = "GPL"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
18
+ f.match(%r{^(test|spec|features)/})
19
+ end
20
+ spec.bindir = "exe"
21
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
+ spec.require_paths = ["lib"]
23
+
24
+ spec.add_dependency 'kindlegen'
25
+ spec.add_dependency 'systemu'
26
+ spec.add_dependency 'mail'
27
+ spec.add_dependency 'mechanize'
28
+ spec.add_dependency 'nokogiri'
29
+ spec.add_dependency 'pit'
30
+ spec.add_dependency 'dropbox_api'
31
+ spec.add_dependency 'mongoid', '~> 6.1'
32
+
33
+ spec.add_development_dependency "bundler"
34
+ spec.add_development_dependency "rake"
35
+ spec.add_development_dependency "rspec"
36
+ spec.add_development_dependency "pry"
37
+ end
@@ -0,0 +1,31 @@
1
+ ---
2
+ :sender: hoge@example.com
3
+ :email:
4
+ :address: smtp.sendgrid.net
5
+ :port: 587
6
+ :user_name: yes
7
+ :password: yes
8
+ :authentication: :plain
9
+ :mongodb_uri: mongodb://localhost:27017/news2kindle
10
+ :tasks:
11
+ newspaper:
12
+ :media:
13
+ - nikkei-free
14
+ - wsj-paid
15
+ :receiver:
16
+ - reader1@kindle.com
17
+ - reader2@kindle.com
18
+ - dropbox:/news
19
+ watch:
20
+ :media:
21
+ - internet-watch
22
+ :receiver:
23
+ - reader1@kindle.com
24
+ - dropbox:/watch
25
+ diary:
26
+ :media:
27
+ - tdiary
28
+ :option:
29
+ :tdiary_top: http://sho.tdiary.net/
30
+ :receiver:
31
+ - reader2@kindle.com
@@ -0,0 +1,27 @@
1
+ * {
2
+ margin: 0px;
3
+ padding: 0px;
4
+ text-indent: 0px;
5
+ line-height: 150%;
6
+ }
7
+
8
+ h1 {
9
+ font-size: 150%;
10
+ font-weight: bold;
11
+ }
12
+
13
+ h2 {
14
+ font-size: 120%;
15
+ font-weight: bold;
16
+ margin: 1em 0em 0em 0em;
17
+ }
18
+
19
+ h3 {
20
+ margin: 1em 0em 0em 0em;
21
+ }
22
+
23
+ p {
24
+ text-indent: 0em;
25
+ margin: 1em 0em 0em 0em;
26
+ }
27
+
Binary file
@@ -0,0 +1,43 @@
1
+ * {
2
+ margin: 0px;
3
+ padding: 0px;
4
+ text-indent: 0px;
5
+ }
6
+
7
+ h1 {
8
+ font-size: 150%;
9
+ font-weight: bold;
10
+ }
11
+
12
+ h2 {
13
+ font-size: 120%;
14
+ font-weight: bold;
15
+ margin: 1em 0em 0em 0em;
16
+ }
17
+
18
+ p {
19
+ text-indent: 0em;
20
+ margin: 1em 0em 0em 0em;
21
+ line-height: 150%;
22
+ }
23
+
24
+ table {
25
+ border-top: 1px solid #444;
26
+ border-left: 1px solid #444;
27
+ border-collapse: collapse;
28
+ border-spacing: 0;
29
+ background-color: #ffffff;
30
+ padding: 4px;
31
+ text-align: left;
32
+ }
33
+ th {
34
+ border-right:1px solid #444;
35
+ border-bottom:1px solid #444;
36
+ padding:0.3em 1em;
37
+ }
38
+
39
+ td {
40
+ border-right:1px solid #444;
41
+ border-bottom:1px solid #444;
42
+ padding:0.3em 1em;
43
+ }
Binary file
@@ -0,0 +1,27 @@
1
+ * {
2
+ margin: 0px;
3
+ padding: 0px;
4
+ text-indent: 0px;
5
+ line-height: 150%;
6
+ }
7
+
8
+ h1 {
9
+ font-size: 150%;
10
+ font-weight: bold;
11
+ }
12
+
13
+ h2 {
14
+ font-size: 120%;
15
+ font-weight: bold;
16
+ margin: 1em 0em 0em 0em;
17
+ }
18
+
19
+ h3 {
20
+ margin: 1em 0em 0em 0em;
21
+ }
22
+
23
+ p {
24
+ text-indent: 0em;
25
+ margin: 1em 0em 0em 0em;
26
+ }
27
+
Binary file
data/resource/wsj.css ADDED
@@ -0,0 +1,19 @@
1
+
2
+ * {
3
+ margin: 0px;
4
+ padding: 0px;
5
+ text-indent: 0px;
6
+ }
7
+
8
+ h1 {
9
+ font-size: 150%;
10
+ font-weight: bold;
11
+ }
12
+
13
+ p {
14
+ text-indent: 0em;
15
+ margin: 1em 0em 0em 0em;
16
+ line-height: 150%;
17
+ }
18
+
19
+
data/resource/wsj.jpg ADDED
Binary file
metadata ADDED
@@ -0,0 +1,245 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: news2kindle
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - TADA Tadashi
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-10-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: kindlegen
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: systemu
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: mail
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: mechanize
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: nokogiri
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: pit
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: dropbox_api
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: mongoid
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '6.1'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '6.1'
125
+ - !ruby/object:Gem::Dependency
126
+ name: bundler
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: rake
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: rspec
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
167
+ - !ruby/object:Gem::Dependency
168
+ name: pry
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
181
+ description: scrape some news site and deliver to kindle
182
+ email:
183
+ - t@tdtds.jp
184
+ executables:
185
+ - news2kindle
186
+ extensions: []
187
+ extra_rdoc_files: []
188
+ files:
189
+ - ".gitignore"
190
+ - ".rspec"
191
+ - ".tachikoma.yml"
192
+ - ".travis.yml"
193
+ - Gemfile
194
+ - Gemfile.lock
195
+ - README.md
196
+ - Rakefile
197
+ - bin/console
198
+ - bin/setup
199
+ - bin/test-generator
200
+ - exe/news2kindle
201
+ - lib/news2kindle.rb
202
+ - lib/news2kindle/dup_checker.rb
203
+ - lib/news2kindle/generator/internet-watch.rb
204
+ - lib/news2kindle/generator/nikkei-free.rb
205
+ - lib/news2kindle/generator/nikkei-paid.rb
206
+ - lib/news2kindle/generator/tdiary.rb
207
+ - lib/news2kindle/generator/wsj-paid.rb
208
+ - lib/news2kindle/generator/wsjus-paid.rb
209
+ - lib/news2kindle/task.rb
210
+ - lib/news2kindle/version.rb
211
+ - news2kindle.gemspec
212
+ - news2kindle.yaml.sample
213
+ - resource/internet-watch.css
214
+ - resource/internet-watch.jpg
215
+ - resource/nikkei.css
216
+ - resource/nikkei.jpg
217
+ - resource/tdiary.css
218
+ - resource/wsj-us.jpg
219
+ - resource/wsj.css
220
+ - resource/wsj.jpg
221
+ homepage: https://github.com/tdtds/news2kindle
222
+ licenses:
223
+ - GPL
224
+ metadata: {}
225
+ post_install_message:
226
+ rdoc_options: []
227
+ require_paths:
228
+ - lib
229
+ required_ruby_version: !ruby/object:Gem::Requirement
230
+ requirements:
231
+ - - ">="
232
+ - !ruby/object:Gem::Version
233
+ version: '0'
234
+ required_rubygems_version: !ruby/object:Gem::Requirement
235
+ requirements:
236
+ - - ">="
237
+ - !ruby/object:Gem::Version
238
+ version: '0'
239
+ requirements: []
240
+ rubyforge_project:
241
+ rubygems_version: 2.5.2.1
242
+ signing_key:
243
+ specification_version: 4
244
+ summary: scrape some news site and deliver to kindle
245
+ test_files: []