news2kindle 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,90 @@
1
+ # -*- coding: utf-8; -*-
2
+ #
3
+ # scraping jp.wsj.com for Kindlizer
4
+ #
5
+
6
+ require File.expand_path('../wsj-paid', __FILE__ )
7
+
8
+ module Kindlizer
9
+ module Generator
10
+ class WsjusPaid < WsjPaid
11
+ TOP = 'http://online.wsj.com'
12
+ LOGIN = "https://id.wsj.com/access/pages/wsj/us/login_standalone.html"
13
+
14
+ def generate(opts)
15
+ @now = opts[:now]
16
+ @now_str = now.strftime '%Y-%m-%d %H:%M'
17
+ @title = "WSJ U.S."
18
+ @lang = "en-US"
19
+ FileUtils.cp( "./resource/wsj-us.jpg", @dst_dir + "/wsj.jpg")
20
+
21
+ agent = Mechanize::new
22
+ agent.set_proxy( *ENV['HTTP_PROXY'].split( /:/ ) ) if ENV['HTTP_PROXY']
23
+
24
+ toc = []
25
+
26
+ agent.get(LOGIN)
27
+
28
+ form = agent.page.forms.first
29
+ form.action = ('https://id.wsj.com/auth/submitlogin.json')
30
+ form['username'] = @wsj_id
31
+ form['password'] = @wsj_pw
32
+ agent.page.forms.first.submit
33
+
34
+ response = JSON.parse(agent.page.body)
35
+ agent.get( response["url"] )
36
+
37
+ agent.get( TOP + "/home-page?_wsjregion=na,us&_homepage=/home/us")
38
+
39
+ #
40
+ # scraping top news
41
+ #
42
+ toc_top = ['TOP NEWS']
43
+ (agent.page / "div.whatsNews ul.newsItem h2 a").each do |a|
44
+ if(a.attr('href') =~ /^http:\/\/online.wsj.com\/article\// or a.attr('href') =~ /^\/article\//)
45
+ toc_top << [canonical( a.text.strip ), a.attr( 'href' )]
46
+ end
47
+ end
48
+ toc << toc_top
49
+
50
+ #
51
+ # scraping all categories
52
+ (agent.page.root / 'div.wsjMainNav li').each do |li|
53
+ a = (li / 'a').first
54
+
55
+ title = a.text.strip
56
+ next if(title == "Home" or title == "Market Data" or title == "C-Suite")
57
+
58
+ toc_cat = []
59
+ toc_cat << canonical( title )
60
+ begin
61
+ retry_loop( 5 ) do
62
+ agent.get(a.attr( 'href' ))
63
+ sleep 1
64
+ end
65
+ rescue
66
+ News2Kindle.logger.error "cannot get #{uri}."
67
+ raise
68
+ end
69
+
70
+ count = 0
71
+ newsLinks = (agent.page / "div.whatsNews ul.newsItem h2 a")
72
+ newsLinks = (agent.page / "div.headlineSummary ul.newsItem h2 a" ) if(newsLinks.size == 0)
73
+ newsLinks.each do |a|
74
+ if(a.attr('href') =~ /^http:\/\/online.wsj.com\/article\// or a.attr('href') =~ /^\/article\//)
75
+ toc_cat << [canonical( a.text.strip ), a.attr( 'href' )]
76
+ count += 1
77
+ break if(count >= 8)
78
+ end
79
+ end
80
+ toc << toc_cat
81
+ end
82
+
83
+ begin
84
+ generate_contents( toc, agent )
85
+ yield "#{@dst_dir}/wsj-paid.opf"
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,116 @@
1
+ # task controller
2
+ #
3
+ # Copyright (C) 2017 by TADA Tadashi <t@tdtds.jp>
4
+ # Distributed under GPL.
5
+ #
6
+ require 'pit'
7
+ require 'kindlegen'
8
+ require 'mail'
9
+ require 'dropbox_api'
10
+
11
+ class DropboxApi::Client
12
+ def chunk_upload(dropbox_file)
13
+ info = DropboxApi::Metadata::CommitInfo.new('path'=>dropbox_file, 'mode'=>:add)
14
+ cursor = upload_session_start('')
15
+ while data = yield
16
+ upload_session_append_v2(cursor, data)
17
+ end
18
+ upload_session_finish(cursor, info)
19
+ end
20
+ end
21
+
22
+ module News2Kindle
23
+ class Task
24
+ def initialize(name)
25
+ @name = name
26
+ require "news2kindle/generator/#{@name}"
27
+ @generator = News2Kindle::Generator.const_get(@name.split(/-/).map{|a|a.capitalize}.join)
28
+ end
29
+
30
+ def run(to, from, opts)
31
+ Dir.mktmpdir do |dir|
32
+ @generator::new(dir).generate(opts) do |opf|
33
+ file = "#{@name}.#{Time.now.strftime("%Y%m%d%H%M%S%2N")}.mobi"
34
+ Kindlegen.run(opf, '-o', file, '-locale', 'ja')
35
+ mobi = Pathname(opf).dirname + file
36
+ if mobi.file?
37
+ News2Kindle.logger.info "generated #{mobi} successfully."
38
+ deliver([to].flatten, from, mobi, opts)
39
+ else
40
+ News2Kindle.logger.error 'failed mobi generation.'
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ private
47
+ def deliver(to_address, from_address, mobi, opts)
48
+ to_dropbox = to_address.map{|a| /^dropbox:/ =~ a ? a : nil}.compact
49
+ deliver_via_dropbox(to_dropbox, mobi)
50
+ deliver_via_mail(to_address - to_dropbox, from_address, mobi, opts)
51
+ end
52
+
53
+ def deliver_via_mail(to_address, from_address, mobi, opts)
54
+ return if to_address.empty?
55
+
56
+ settings = opts[:email]
57
+ if settings[:user_name] or settings[:password]
58
+ account = Pit::get('news2kindle', require: {
59
+ mail_user_name: 'your e-mail id',
60
+ mail_password: 'your e-mail password'
61
+ })
62
+ settings[:user_name] = account[:mail_user_name]
63
+ settings[:password] = account[:mail_password]
64
+ end
65
+ Mail.defaults{delivery_method :smtp, settings}
66
+ Mail.deliver do
67
+ from from_address
68
+ to to_address
69
+ subject 'sent by news2kindle'
70
+ body 'dummy text'
71
+ attachments[mobi.basename.to_s] = {
72
+ :mime_type => 'application/octet-stream',
73
+ :content => open(mobi, &:read)
74
+ }
75
+ end
76
+ News2Kindle.logger.info "sent mails successfully."
77
+ end
78
+
79
+ def deliver_via_dropbox(to_address, mobi)
80
+ return if to_address.empty?
81
+
82
+ begin
83
+ auth = Pit::get('news2kindle')
84
+ unless auth[:dropbox_token]
85
+ print "Enter dropbox app key: "
86
+ api_key = $stdin.gets.chomp
87
+
88
+ print "Enter dropbox app secret: "
89
+ api_secret = $stdin.gets.chomp
90
+
91
+ authenticator = DropboxApi::Authenticator.new(api_key, api_secret)
92
+ puts "\nGo to this url and click 'Authorize' to get the token:"
93
+ puts authenticator.authorize_url
94
+
95
+ print "Enter the token: "
96
+ code = $stdin.gets.chomp
97
+
98
+ auth[:dropbox_token] = authenticator.get_token(code).token
99
+ Pit::set('news2kindle', data: auth)
100
+ end
101
+ client = DropboxApi::Client.new(auth[:dropbox_token])
102
+ to_address.each do |address|
103
+ file = Pathname(address.sub(/^dropbox:/, '')) + mobi.basename
104
+ open(mobi) do |f|
105
+ client.chunk_upload(file){f.read(10_000_000)}
106
+ end
107
+ News2Kindle.logger.info "saved to #{file} successfully."
108
+ end
109
+ rescue
110
+ News2Kindle.logger.error "failed while saving to dropbox."
111
+ News2Kindle.logger.debug $!
112
+ $@.each{|l| News2Kindle.logger.debug l}
113
+ end
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,3 @@
1
+ module News2Kindle
2
+ VERSION = "0.1.1"
3
+ end
@@ -0,0 +1,37 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "news2kindle/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "news2kindle"
8
+ spec.version = News2Kindle::VERSION
9
+ spec.authors = ["TADA Tadashi"]
10
+ spec.email = ["t@tdtds.jp"]
11
+
12
+ spec.summary = %q{scrape some news site and deliver to kindle}
13
+ spec.description = %q{scrape some news site and deliver to kindle}
14
+ spec.homepage = "https://github.com/tdtds/news2kindle"
15
+ spec.license = "GPL"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
18
+ f.match(%r{^(test|spec|features)/})
19
+ end
20
+ spec.bindir = "exe"
21
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
+ spec.require_paths = ["lib"]
23
+
24
+ spec.add_dependency 'kindlegen'
25
+ spec.add_dependency 'systemu'
26
+ spec.add_dependency 'mail'
27
+ spec.add_dependency 'mechanize'
28
+ spec.add_dependency 'nokogiri'
29
+ spec.add_dependency 'pit'
30
+ spec.add_dependency 'dropbox_api'
31
+ spec.add_dependency 'mongoid', '~> 6.1'
32
+
33
+ spec.add_development_dependency "bundler"
34
+ spec.add_development_dependency "rake"
35
+ spec.add_development_dependency "rspec"
36
+ spec.add_development_dependency "pry"
37
+ end
@@ -0,0 +1,31 @@
1
+ ---
2
+ :sender: hoge@example.com
3
+ :email:
4
+ :address: smtp.sendgrid.net
5
+ :port: 587
6
+ :user_name: yes
7
+ :password: yes
8
+ :authentication: :plain
9
+ :mongodb_uri: mongodb://localhost:27017/news2kindle
10
+ :tasks:
11
+ newspaper:
12
+ :media:
13
+ - nikkei-free
14
+ - wsj-paid
15
+ :receiver:
16
+ - reader1@kindle.com
17
+ - reader2@kindle.com
18
+ - dropbox:/news
19
+ watch:
20
+ :media:
21
+ - internet-watch
22
+ :receiver:
23
+ - reader1@kindle.com
24
+ - dropbox:/watch
25
+ diary:
26
+ :media:
27
+ - tdiary
28
+ :option:
29
+ :tdiary_top: http://sho.tdiary.net/
30
+ :receiver:
31
+ - reader2@kindle.com
@@ -0,0 +1,27 @@
1
+ * {
2
+ margin: 0px;
3
+ padding: 0px;
4
+ text-indent: 0px;
5
+ line-height: 150%;
6
+ }
7
+
8
+ h1 {
9
+ font-size: 150%;
10
+ font-weight: bold;
11
+ }
12
+
13
+ h2 {
14
+ font-size: 120%;
15
+ font-weight: bold;
16
+ margin: 1em 0em 0em 0em;
17
+ }
18
+
19
+ h3 {
20
+ margin: 1em 0em 0em 0em;
21
+ }
22
+
23
+ p {
24
+ text-indent: 0em;
25
+ margin: 1em 0em 0em 0em;
26
+ }
27
+
Binary file
@@ -0,0 +1,43 @@
1
+ * {
2
+ margin: 0px;
3
+ padding: 0px;
4
+ text-indent: 0px;
5
+ }
6
+
7
+ h1 {
8
+ font-size: 150%;
9
+ font-weight: bold;
10
+ }
11
+
12
+ h2 {
13
+ font-size: 120%;
14
+ font-weight: bold;
15
+ margin: 1em 0em 0em 0em;
16
+ }
17
+
18
+ p {
19
+ text-indent: 0em;
20
+ margin: 1em 0em 0em 0em;
21
+ line-height: 150%;
22
+ }
23
+
24
+ table {
25
+ border-top: 1px solid #444;
26
+ border-left: 1px solid #444;
27
+ border-collapse: collapse;
28
+ border-spacing: 0;
29
+ background-color: #ffffff;
30
+ padding: 4px;
31
+ text-align: left;
32
+ }
33
+ th {
34
+ border-right:1px solid #444;
35
+ border-bottom:1px solid #444;
36
+ padding:0.3em 1em;
37
+ }
38
+
39
+ td {
40
+ border-right:1px solid #444;
41
+ border-bottom:1px solid #444;
42
+ padding:0.3em 1em;
43
+ }
Binary file
@@ -0,0 +1,27 @@
1
+ * {
2
+ margin: 0px;
3
+ padding: 0px;
4
+ text-indent: 0px;
5
+ line-height: 150%;
6
+ }
7
+
8
+ h1 {
9
+ font-size: 150%;
10
+ font-weight: bold;
11
+ }
12
+
13
+ h2 {
14
+ font-size: 120%;
15
+ font-weight: bold;
16
+ margin: 1em 0em 0em 0em;
17
+ }
18
+
19
+ h3 {
20
+ margin: 1em 0em 0em 0em;
21
+ }
22
+
23
+ p {
24
+ text-indent: 0em;
25
+ margin: 1em 0em 0em 0em;
26
+ }
27
+
Binary file
data/resource/wsj.css ADDED
@@ -0,0 +1,19 @@
1
+
2
+ * {
3
+ margin: 0px;
4
+ padding: 0px;
5
+ text-indent: 0px;
6
+ }
7
+
8
+ h1 {
9
+ font-size: 150%;
10
+ font-weight: bold;
11
+ }
12
+
13
+ p {
14
+ text-indent: 0em;
15
+ margin: 1em 0em 0em 0em;
16
+ line-height: 150%;
17
+ }
18
+
19
+
data/resource/wsj.jpg ADDED
Binary file
metadata ADDED
@@ -0,0 +1,245 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: news2kindle
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - TADA Tadashi
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-10-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: kindlegen
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: systemu
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: mail
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: mechanize
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: nokogiri
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: pit
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: dropbox_api
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: mongoid
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '6.1'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '6.1'
125
+ - !ruby/object:Gem::Dependency
126
+ name: bundler
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: rake
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: rspec
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
167
+ - !ruby/object:Gem::Dependency
168
+ name: pry
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
181
+ description: scrape some news site and deliver to kindle
182
+ email:
183
+ - t@tdtds.jp
184
+ executables:
185
+ - news2kindle
186
+ extensions: []
187
+ extra_rdoc_files: []
188
+ files:
189
+ - ".gitignore"
190
+ - ".rspec"
191
+ - ".tachikoma.yml"
192
+ - ".travis.yml"
193
+ - Gemfile
194
+ - Gemfile.lock
195
+ - README.md
196
+ - Rakefile
197
+ - bin/console
198
+ - bin/setup
199
+ - bin/test-generator
200
+ - exe/news2kindle
201
+ - lib/news2kindle.rb
202
+ - lib/news2kindle/dup_checker.rb
203
+ - lib/news2kindle/generator/internet-watch.rb
204
+ - lib/news2kindle/generator/nikkei-free.rb
205
+ - lib/news2kindle/generator/nikkei-paid.rb
206
+ - lib/news2kindle/generator/tdiary.rb
207
+ - lib/news2kindle/generator/wsj-paid.rb
208
+ - lib/news2kindle/generator/wsjus-paid.rb
209
+ - lib/news2kindle/task.rb
210
+ - lib/news2kindle/version.rb
211
+ - news2kindle.gemspec
212
+ - news2kindle.yaml.sample
213
+ - resource/internet-watch.css
214
+ - resource/internet-watch.jpg
215
+ - resource/nikkei.css
216
+ - resource/nikkei.jpg
217
+ - resource/tdiary.css
218
+ - resource/wsj-us.jpg
219
+ - resource/wsj.css
220
+ - resource/wsj.jpg
221
+ homepage: https://github.com/tdtds/news2kindle
222
+ licenses:
223
+ - GPL
224
+ metadata: {}
225
+ post_install_message:
226
+ rdoc_options: []
227
+ require_paths:
228
+ - lib
229
+ required_ruby_version: !ruby/object:Gem::Requirement
230
+ requirements:
231
+ - - ">="
232
+ - !ruby/object:Gem::Version
233
+ version: '0'
234
+ required_rubygems_version: !ruby/object:Gem::Requirement
235
+ requirements:
236
+ - - ">="
237
+ - !ruby/object:Gem::Version
238
+ version: '0'
239
+ requirements: []
240
+ rubyforge_project:
241
+ rubygems_version: 2.5.2.1
242
+ signing_key:
243
+ specification_version: 4
244
+ summary: scrape some news site and deliver to kindle
245
+ test_files: []