kindle_manager 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: cb02e5a3a67d8232fbff330bdc508c7b2a841e09
4
+ data.tar.gz: 2641e09f54bb46cf8ca1dfe3b70950ce213ab43c
5
+ SHA512:
6
+ metadata.gz: c695ef3285f6d530a8ad13ba16bf75105f782736bb0eccd05cdf97171789474caa51c385da23a16a6bce9d21f5764975d8f7bb5ef682ca7d9f2f12cdf9964b10
7
+ data.tar.gz: f80af23783b9af78e220cfb16aa42a6340d86ed9c667db43831aa1ac41de1c5869d73c46f23df80b38debc02c1b7d232952f558fd04932c2bc60facd348017a0
data/.gitignore ADDED
@@ -0,0 +1,16 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ /.env
11
+ /.byebug_history
12
+ /downloads/
13
+ /spec/fixtures/files/
14
+
15
+ # rspec failure tracking
16
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.2.4
5
+ before_install: gem install bundler -v 1.14.6
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in kindle_manager.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2017 Kazuho Yamaguchi
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,65 @@
1
+ # KindleManager
2
+
3
+ Scrape information of kindle books from amazon site
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'kindle_manager'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install kindle_manager
20
+
21
+ ## Usage
22
+
23
+ ### Setup
24
+
25
+ Create _.env_ following the instructions of https://github.com/kyamaguchi/amazon_auth
26
+
27
+ And `Dotenv.load` or `gem 'dotenv-rails'` may be required when you use this in your app.
28
+
29
+ ### Run
30
+
31
+ ```
32
+ bin/console
33
+ ```
34
+
35
+ In console
36
+
37
+ ```
38
+ client = KindleManager::Client.new
39
+ client.fetch_kindle_list
40
+ books = client.load_kindle_books
41
+
42
+ client.quit
43
+ ```
44
+
45
+ #### Options
46
+
47
+ Debug print: `client = KindleManager::Client.new(debug: true)`
48
+
49
+ Limit fetching with number of fetched books: `client = KindleManager::Client.new(limit: 100)`
50
+
51
+ ## Development
52
+
53
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
54
+
55
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
56
+
57
+ ## Contributing
58
+
59
+ Bug reports and pull requests are welcome on GitHub at https://github.com/kyamaguchi/kindle_manager.
60
+
61
+
62
+ ## License
63
+
64
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
65
+
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "kindle_manager"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'kindle_manager/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "kindle_manager"
8
+ spec.version = KindleManager::VERSION
9
+ spec.authors = ["Kazuho Yamaguchi"]
10
+ spec.email = ["kzh.yap@gmail.com"]
11
+
12
+ spec.summary = %q{Scrape information of kindle books from amazon site}
13
+ spec.description = %q{Scrape information of kindle books from amazon site}
14
+ spec.homepage = "https://github.com/kyamaguchi/kindle_manager"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
18
+ f.match(%r{^(test|spec|features)/})
19
+ end
20
+ spec.bindir = "exe"
21
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
+ spec.require_paths = ["lib"]
23
+
24
+ spec.add_runtime_dependency "amazon_auth", "~> 0.1.2"
25
+ spec.add_development_dependency "bundler", "~> 1.14"
26
+ spec.add_development_dependency "rake", "~> 10.0"
27
+ spec.add_development_dependency "rspec", "~> 3.0"
28
+ spec.add_development_dependency "byebug"
29
+ end
@@ -0,0 +1,9 @@
1
+ require 'byebug'
2
+ require 'amazon_auth'
3
+ require "kindle_manager/version"
4
+ require "kindle_manager/client"
5
+ require "kindle_manager/file_store"
6
+ require "kindle_manager/list_parser"
7
+
8
+ module KindleManager
9
+ end
@@ -0,0 +1,116 @@
1
+ module KindleManager
2
+ class Client
3
+ attr_accessor :page
4
+
5
+ def initialize(options = {})
6
+ @debug = options.fetch(:debug, false)
7
+ @limit = options.fetch(:limit, nil)
8
+ begin
9
+ @client = AmazonAuth::Client.new
10
+ rescue => e
11
+ puts "Please setup credentials of amazon_auth gem with folloing its instruction."
12
+ puts
13
+ raise e
14
+ end
15
+ end
16
+
17
+ def store
18
+ # Create file store without session(page) by default
19
+ @store ||= KindleManager::FileStore.new(nil, latest: true)
20
+ end
21
+
22
+ def setup_file_store_with_session
23
+ @store = KindleManager::FileStore.new(page)
24
+ end
25
+
26
+ def fetch_kindle_list
27
+ sign_in
28
+ setup_file_store_with_session
29
+ go_to_kindle_management_page
30
+ begin
31
+ load_next_kindle_list
32
+ rescue => e
33
+ byebug
34
+ # retry ?
35
+ puts e
36
+ end
37
+ end
38
+
39
+ def load_kindle_books
40
+ books = []
41
+ store.list_html_files.each do |file|
42
+ parser = KindleManager::ListParser.new(file)
43
+ books += parser.book_list
44
+ end
45
+ books.uniq(&:asin)
46
+ end
47
+
48
+ def sign_in
49
+ @page = @client.sign_in
50
+ end
51
+
52
+ def go_to_kindle_management_page
53
+ wait_for_selector('#shopAllLinks')
54
+ page.within('#shopAllLinks') do
55
+ page.find('a', text: 'コンテンツと端末の管理').click
56
+ end
57
+ page
58
+ end
59
+
60
+ def load_next_kindle_list
61
+ wait_for_selector('.contentCount_myx')
62
+ @current_loop = 0
63
+ while @current_loop <= 12 # max attempts
64
+ if @limit && @limit < number_of_fetched_books
65
+ break
66
+ elsif has_more_button?
67
+ debug_print_page
68
+ @current_loop = 0
69
+
70
+ puts "Clicking もっと表示"
71
+ page.execute_script "window.scrollBy(0,-800)"
72
+ page.click_on('もっと表示')
73
+ sleep 1
74
+ raise('Clicking of more button may have failed') if has_more_button?
75
+ else
76
+ puts "Scrolling #{@current_loop}"
77
+ page.execute_script "window.scrollBy(0,10000)"
78
+ end
79
+ sleep 5
80
+ @current_loop += 1
81
+ end
82
+ debug_print_page
83
+ end
84
+
85
+ def quit
86
+ page.driver.quit
87
+ end
88
+
89
+ def wait_for_selector(selector, seconds = 3)
90
+ seconds.times { sleep(1) unless page.first(selector) }
91
+ end
92
+
93
+ def has_more_button?
94
+ page.all('#contentTable_showMore_myx').map(&:text).include?('もっと表示')
95
+ end
96
+
97
+ def number_of_fetched_books
98
+ m = page.first('.contentCount_myx').text.match(/(\d+)を表示中/)
99
+ m.nil? ? nil : m[1].to_i
100
+ end
101
+
102
+ def loading?
103
+ page.first('.myx-popover-loading-wrapper').present?
104
+ end
105
+
106
+ def debug_print_page
107
+ if @debug
108
+ store.record_page
109
+ puts Time.current.strftime("%Y-%m-%d %H:%M:%S")
110
+ puts "Loop: #{@current_loop}"
111
+ puts page.first('.contentCount_myx').text if page.first('.contentCount_myx')
112
+ puts
113
+ end
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,60 @@
1
+ module KindleManager
2
+ class FileStore
3
+ attr_accessor :dir_name
4
+
5
+ def initialize(session, options = {})
6
+ @dir_name = options.fetch(:dir_name) do
7
+ options[:latest] ? find_latest_dir_name : Time.current.strftime("%Y%m%d%H%M%S")
8
+ end
9
+ @session = session
10
+ end
11
+
12
+ def base_dir
13
+ File.join(self.class.downloads_dir, @dir_name)
14
+ end
15
+
16
+ def self.downloads_dir
17
+ 'downloads'
18
+ end
19
+
20
+ def html_path(time)
21
+ build_filepath(time, 'html')
22
+ end
23
+
24
+ def image_path(time)
25
+ build_filepath(time, 'png')
26
+ end
27
+
28
+ def record_page
29
+ time = Time.current
30
+ @session.save_page(html_path(time))
31
+ @session.save_screenshot(image_path(time))
32
+ end
33
+
34
+ def self.list_download_dirs
35
+ Dir["#{downloads_dir}/*"].select{|f| File.directory? f }
36
+ end
37
+
38
+ def self.list_html_files(dir = nil)
39
+ if dir
40
+ Dir[File.join(downloads_dir, dir,'*.html')].select{|f| File.file? f }
41
+ else
42
+ Dir["#{downloads_dir}/*/*.html"].select{|f| File.file? f }
43
+ end
44
+ end
45
+
46
+ def list_html_files
47
+ self.class.list_html_files(@dir_name)
48
+ end
49
+
50
+ def find_latest_dir_name
51
+ self.class.list_download_dirs.sort.last.split('/').last
52
+ end
53
+
54
+ private
55
+
56
+ def build_filepath(time, ext)
57
+ File.join(base_dir, "#{time.strftime('%Y%m%d%H%M%S')}#{(time.usec / 1000.0).round.to_s.rjust(3,'0')}.#{ext}")
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,64 @@
1
+ module KindleManager
2
+ class ListParser
3
+ class BookRow
4
+ def initialize(node)
5
+ @node = node
6
+ end
7
+
8
+ def inspect
9
+ "#<#{self.class.name}:#{self.object_id} #{self.to_hash}>"
10
+ end
11
+
12
+ def asin
13
+ @_asin ||= @node['name'].gsub(/\AcontentTabList_/, '')
14
+ end
15
+
16
+ def title
17
+ @_title ||= @node.css("div[id^='title']").text
18
+ end
19
+
20
+ def tag
21
+ @_tag ||= @node.css("div[id^='listViewTitleTag']").css('.myx-text-bold').first.text.strip
22
+ end
23
+
24
+ def author
25
+ @_author ||= @node.css("div[id^='author']").text
26
+ end
27
+
28
+ def date
29
+ @_date ||= begin
30
+ m = @node.css("div[id^='date']").text.match(/\A(?<year>\d{4})年(?<month>\d{1,2})月(?<day>\d{1,2})日\z/)
31
+ Date.new(m[:year].to_i, m[:month].to_i, m[:day].to_i)
32
+ end
33
+ end
34
+
35
+ def collection_count
36
+ @_collection_count ||= @node.css(".collectionsCount .myx-collection-count").first.text.strip.to_i
37
+ end
38
+
39
+ def to_hash
40
+ hash = {}
41
+ %w[asin title tag author date collection_count].each do |f|
42
+ hash[f] = send(f)
43
+ end
44
+ hash
45
+ end
46
+ end
47
+
48
+ def initialize(filepath, options = {})
49
+ @filepath = filepath
50
+ end
51
+
52
+ def book_list
53
+ @book_list ||= doc.css("div[id^='contentTabList_']").map{|e| BookRow.new(e) }
54
+ end
55
+
56
+ def doc
57
+ @doc ||= Nokogiri::HTML(body)
58
+ end
59
+
60
+ def body
61
+ @body ||= File.read(@filepath)
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,3 @@
1
+ module KindleManager
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,129 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: kindle_manager
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Kazuho Yamaguchi
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-03-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: amazon_auth
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.1.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.1.2
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.14'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.14'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: byebug
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: Scrape information of kindle books from amazon site
84
+ email:
85
+ - kzh.yap@gmail.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - ".rspec"
92
+ - ".travis.yml"
93
+ - Gemfile
94
+ - LICENSE.txt
95
+ - README.md
96
+ - Rakefile
97
+ - bin/console
98
+ - bin/setup
99
+ - kindle_manager.gemspec
100
+ - lib/kindle_manager.rb
101
+ - lib/kindle_manager/client.rb
102
+ - lib/kindle_manager/file_store.rb
103
+ - lib/kindle_manager/list_parser.rb
104
+ - lib/kindle_manager/version.rb
105
+ homepage: https://github.com/kyamaguchi/kindle_manager
106
+ licenses:
107
+ - MIT
108
+ metadata: {}
109
+ post_install_message:
110
+ rdoc_options: []
111
+ require_paths:
112
+ - lib
113
+ required_ruby_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ required_rubygems_version: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ requirements: []
124
+ rubyforge_project:
125
+ rubygems_version: 2.4.8
126
+ signing_key:
127
+ specification_version: 4
128
+ summary: Scrape information of kindle books from amazon site
129
+ test_files: []