klipbook 3.0.0 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +1 -1
- data/CHANGELOG.txt +22 -13
- data/Gemfile.lock +46 -71
- data/README.md +7 -24
- data/Rakefile +0 -4
- data/klipbook.gemspec +0 -1
- data/lib/klipbook.rb +0 -2
- data/lib/klipbook/cli.rb +3 -8
- data/lib/klipbook/commands/command.rb +2 -2
- data/lib/klipbook/sources/source.rb +0 -13
- data/lib/klipbook/version.rb +1 -1
- metadata +3 -20
- data/.ruby-version +0 -1
- data/lib/klipbook/sources/amazon_site/book_scraper.rb +0 -65
- data/lib/klipbook/sources/amazon_site/site_scraper.rb +0 -80
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 404f5bf771069c94bf0c7b2081168d52a83b50d90519f0bcc03743e56cd88639
|
4
|
+
data.tar.gz: 8e5f90442fd8c81add6818016f3b1c7aad537920b1bee88407e8190b20284a93
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b7fc57d83fcb14116102c2def8d357afd090dd321af905e78022257dd802fb39f8708dd9ce2f11c81dba291d98b268914272109959ca0e47fdc8e66ef7156359
|
7
|
+
data.tar.gz: 48199b739319ef3b85318257dfa784d0f66ef728f53323b7be00915258e42a654cb6c1da68c9384fed196df57df13c7e51c89fe46811b4a931edf4c64c719fc7
|
data/.travis.yml
CHANGED
data/CHANGELOG.txt
CHANGED
@@ -1,45 +1,54 @@
|
|
1
|
+
== 4.0.0 / 2018-11-07
|
2
|
+
|
3
|
+
* Breaking changes
|
4
|
+
|
5
|
+
* Remove site scraping feature.
|
6
|
+
|
7
|
+
* Minor changes
|
8
|
+
|
9
|
+
* Tested on Ruby 2.5.2
|
10
|
+
|
1
11
|
== 3.0.0 / 2016-09-18
|
2
12
|
|
3
13
|
* Feature Changes
|
4
14
|
|
5
|
-
|
15
|
+
* Added markdown as an export format.
|
6
16
|
|
7
17
|
* Breaking changes
|
8
18
|
|
9
|
-
|
10
|
-
|
19
|
+
* Changed command flags to make things more consistent.
|
20
|
+
* Changed JSON export to export into individual files rather than a single file. Again for consistency with other export formats.
|
11
21
|
|
12
22
|
== 2.1.3 / 2014-07-25
|
13
23
|
|
14
24
|
* Minor changes
|
15
25
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
26
|
+
* Removed Rainbow, RR, pry-debugger gems
|
27
|
+
* Updated dependent gems
|
28
|
+
* Updated tests to use RSpec 3
|
29
|
+
* Tested on Ruby 2.1.2
|
20
30
|
|
21
31
|
== 2.1.0 / 2013-12-11
|
22
32
|
|
23
33
|
* Feature changes
|
24
34
|
|
25
|
-
|
35
|
+
* Reworked parameters for better clarity. tosjon and tohtml are now supported.
|
26
36
|
|
27
37
|
== 2.0.0 / 2013-02-12
|
28
38
|
|
29
39
|
* Feature changes
|
30
40
|
|
31
|
-
|
41
|
+
* Renamed the collate command to pretty print which describes what it does better.
|
32
42
|
|
33
|
-
|
43
|
+
* Introduces a _new_ collate command that collates all clippings into a single JSON
|
34
44
|
file.
|
35
45
|
|
36
46
|
* Other unexposed changes
|
37
47
|
|
38
|
-
|
48
|
+
* Internally refactored much of the codebase into a more sensible structure.
|
39
49
|
|
40
50
|
== 1.0.2 / 2012-12-12
|
41
51
|
|
42
52
|
* Bug fixes
|
43
53
|
|
44
|
-
|
45
|
-
|
54
|
+
* Updated site scraping code to ensure it still works
|
data/Gemfile.lock
CHANGED
@@ -1,97 +1,72 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
klipbook (
|
4
|
+
klipbook (4.0.0)
|
5
5
|
commander
|
6
|
-
mechanize
|
7
6
|
|
8
7
|
GEM
|
9
8
|
remote: https://rubygems.org/
|
10
9
|
specs:
|
11
|
-
aruba (0.14.
|
12
|
-
childprocess (
|
10
|
+
aruba (0.14.6)
|
11
|
+
childprocess (>= 0.6.3, < 0.10.0)
|
13
12
|
contracts (~> 0.9)
|
14
13
|
cucumber (>= 1.3.19)
|
15
14
|
ffi (~> 1.9.10)
|
16
15
|
rspec-expectations (>= 2.99)
|
17
16
|
thor (~> 0.19)
|
18
|
-
|
19
|
-
|
20
|
-
|
17
|
+
backports (3.11.4)
|
18
|
+
builder (3.2.3)
|
19
|
+
byebug (10.0.2)
|
20
|
+
childprocess (0.9.0)
|
21
21
|
ffi (~> 1.0, >= 1.0.11)
|
22
|
-
coderay (1.1.
|
23
|
-
commander (4.4.
|
24
|
-
highline (~>
|
25
|
-
contracts (0.
|
26
|
-
cucumber (
|
22
|
+
coderay (1.1.2)
|
23
|
+
commander (4.4.7)
|
24
|
+
highline (~> 2.0.0)
|
25
|
+
contracts (0.16.0)
|
26
|
+
cucumber (3.1.2)
|
27
27
|
builder (>= 2.1.2)
|
28
|
-
cucumber-core (~>
|
28
|
+
cucumber-core (~> 3.2.0)
|
29
|
+
cucumber-expressions (~> 6.0.1)
|
29
30
|
cucumber-wire (~> 0.0.1)
|
30
|
-
diff-lcs (
|
31
|
-
gherkin (~>
|
31
|
+
diff-lcs (~> 1.3)
|
32
|
+
gherkin (~> 5.1.0)
|
32
33
|
multi_json (>= 1.7.5, < 2.0)
|
33
34
|
multi_test (>= 0.1.2)
|
34
|
-
cucumber-core (
|
35
|
-
|
35
|
+
cucumber-core (3.2.1)
|
36
|
+
backports (>= 3.8.0)
|
37
|
+
cucumber-tag_expressions (~> 1.1.0)
|
38
|
+
gherkin (~> 5.0)
|
39
|
+
cucumber-expressions (6.0.1)
|
40
|
+
cucumber-tag_expressions (1.1.1)
|
36
41
|
cucumber-wire (0.0.1)
|
37
|
-
diff-lcs (1.
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
http-cookie (1.0.2)
|
44
|
-
domain_name (~> 0.5)
|
45
|
-
mechanize (2.7.5)
|
46
|
-
domain_name (~> 0.5, >= 0.5.1)
|
47
|
-
http-cookie (~> 1.0)
|
48
|
-
mime-types (>= 1.17.2)
|
49
|
-
net-http-digest_auth (~> 1.1, >= 1.1.1)
|
50
|
-
net-http-persistent (~> 2.5, >= 2.5.2)
|
51
|
-
nokogiri (~> 1.6)
|
52
|
-
ntlm-http (~> 0.1, >= 0.1.1)
|
53
|
-
webrobots (>= 0.0.9, < 0.2)
|
54
|
-
method_source (0.8.2)
|
55
|
-
mime-types (3.1)
|
56
|
-
mime-types-data (~> 3.2015)
|
57
|
-
mime-types-data (3.2016.0521)
|
58
|
-
mini_portile2 (2.1.0)
|
59
|
-
multi_json (1.12.1)
|
42
|
+
diff-lcs (1.3)
|
43
|
+
ffi (1.9.25)
|
44
|
+
gherkin (5.1.0)
|
45
|
+
highline (2.0.0)
|
46
|
+
method_source (0.9.1)
|
47
|
+
multi_json (1.13.1)
|
60
48
|
multi_test (0.1.2)
|
61
|
-
|
62
|
-
net-http-persistent (2.9.4)
|
63
|
-
nokogiri (1.6.8)
|
64
|
-
mini_portile2 (~> 2.1.0)
|
65
|
-
pkg-config (~> 1.1.7)
|
66
|
-
ntlm-http (0.1.1)
|
67
|
-
pkg-config (1.1.7)
|
68
|
-
pry (0.10.4)
|
49
|
+
pry (0.12.0)
|
69
50
|
coderay (~> 1.1.0)
|
70
|
-
method_source (~> 0.
|
71
|
-
|
72
|
-
|
73
|
-
byebug (~> 9.0)
|
51
|
+
method_source (~> 0.9.0)
|
52
|
+
pry-byebug (3.6.0)
|
53
|
+
byebug (~> 10.0)
|
74
54
|
pry (~> 0.10)
|
75
|
-
rake (
|
76
|
-
rspec (3.
|
77
|
-
rspec-core (~> 3.
|
78
|
-
rspec-expectations (~> 3.
|
79
|
-
rspec-mocks (~> 3.
|
80
|
-
rspec-core (3.
|
81
|
-
rspec-support (~> 3.
|
82
|
-
rspec-expectations (3.
|
55
|
+
rake (12.3.1)
|
56
|
+
rspec (3.8.0)
|
57
|
+
rspec-core (~> 3.8.0)
|
58
|
+
rspec-expectations (~> 3.8.0)
|
59
|
+
rspec-mocks (~> 3.8.0)
|
60
|
+
rspec-core (3.8.0)
|
61
|
+
rspec-support (~> 3.8.0)
|
62
|
+
rspec-expectations (3.8.2)
|
83
63
|
diff-lcs (>= 1.2.0, < 2.0)
|
84
|
-
rspec-support (~> 3.
|
85
|
-
rspec-mocks (3.
|
64
|
+
rspec-support (~> 3.8.0)
|
65
|
+
rspec-mocks (3.8.0)
|
86
66
|
diff-lcs (>= 1.2.0, < 2.0)
|
87
|
-
rspec-support (~> 3.
|
88
|
-
rspec-support (3.
|
89
|
-
|
90
|
-
thor (0.19.1)
|
91
|
-
unf (0.1.4)
|
92
|
-
unf_ext
|
93
|
-
unf_ext (0.0.7.2)
|
94
|
-
webrobots (0.1.2)
|
67
|
+
rspec-support (~> 3.8.0)
|
68
|
+
rspec-support (3.8.0)
|
69
|
+
thor (0.20.0)
|
95
70
|
|
96
71
|
PLATFORMS
|
97
72
|
ruby
|
@@ -106,4 +81,4 @@ DEPENDENCIES
|
|
106
81
|
rspec
|
107
82
|
|
108
83
|
BUNDLED WITH
|
109
|
-
1.
|
84
|
+
1.16.4
|
data/README.md
CHANGED
@@ -12,13 +12,7 @@ of a clippings file" />
|
|
12
12
|
|
13
13
|
## What sources does it support?
|
14
14
|
|
15
|
-
Klipbook can take your highlights from a clippings file off a physical Kindle device
|
16
|
-
|
17
|
-
### When should I use the file and when should I use the site?
|
18
|
-
|
19
|
-
If your books are purchased from Amazon directly then use the site. The site is the canonical source and will contain all of your notes and highlights from across all of your Kindle devices and applications. Currently the Amazon highlights site only shows clippings for books you've purchased on Amazon.
|
20
|
-
|
21
|
-
If your books are personal documents and you've read them on your Kindle device itself then you should use the file from that device as a source i.e. your Kindle device only keeps clippings that you make directly on it.
|
15
|
+
Klipbook can take your highlights from a clippings file off a physical Kindle device.
|
22
16
|
|
23
17
|
## How does it work?
|
24
18
|
|
@@ -37,19 +31,9 @@ Copy your clippings file (called "My Clippings.txt" on a 3rd generation Kindle)
|
|
37
31
|
Then specify the path to your clippings via:
|
38
32
|
|
39
33
|
```sh
|
40
|
-
$ klipbook --from-file "My Clippings.txt"
|
41
|
-
```
|
42
|
-
|
43
|
-
### Reading from the highlights site
|
44
|
-
|
45
|
-
Specify your Amazon credentials to klipbook and it will scrape the site and output a clippings file.
|
46
|
-
|
47
|
-
```sh
|
48
|
-
$ klipbook --from-site my-username@blah.com:my-password
|
34
|
+
$ klipbook list --from-file "My Clippings.txt"
|
49
35
|
```
|
50
36
|
|
51
|
-
Note that the scraping requires a network connection (obviously) and can take a while so please be patient.
|
52
|
-
|
53
37
|
## List
|
54
38
|
|
55
39
|
The `list` command lists the books available in the specified source.
|
@@ -79,7 +63,7 @@ Klipbook will not overwrite an exiting file by default. You can change this with
|
|
79
63
|
You can export clippings into a pretty html file for each book.
|
80
64
|
|
81
65
|
```sh
|
82
|
-
$ klipbook export
|
66
|
+
$ klipbook export --from-file "My Clippings.txt" --format html
|
83
67
|
```
|
84
68
|
|
85
69
|
### Export to JSON
|
@@ -87,7 +71,7 @@ $ klipbook export ---from-file "My Clippings.txt" --output-format html
|
|
87
71
|
You can export clippings into a JSON file for each book.
|
88
72
|
|
89
73
|
```sh
|
90
|
-
$ klipbook export --from-file "My Clippings.txt" --
|
74
|
+
$ klipbook export --from-file "My Clippings.txt" --format json
|
91
75
|
```
|
92
76
|
|
93
77
|
### Export to Markdown
|
@@ -95,7 +79,7 @@ $ klipbook export --from-file "My Clippings.txt" --output-format json
|
|
95
79
|
You can export clippings into a markdown file for each book.
|
96
80
|
|
97
81
|
```sh
|
98
|
-
$ klipbook export --from-file "My Clippings.txt" --
|
82
|
+
$ klipbook export --from-file "My Clippings.txt" --format markdown
|
99
83
|
```
|
100
84
|
|
101
85
|
## Set defaults
|
@@ -107,7 +91,6 @@ This is a YAML file and you can specify default values for the source and the ou
|
|
107
91
|
```sh
|
108
92
|
$ cat ~/.klipbookrc
|
109
93
|
|
110
|
-
:from_site: my-username@blah.com:my-password
|
111
94
|
:output_dir: /path/to/my/default/output/directory
|
112
95
|
```
|
113
96
|
|
@@ -127,7 +110,7 @@ Klipbook has been tested on clippings files from 3rd generation Kindles and the
|
|
127
110
|
|
128
111
|
## Tested platforms
|
129
112
|
|
130
|
-
Klipbook has been tested on Mac OS
|
113
|
+
Klipbook has been tested on Mac OS High Sierra using Ruby 2.5.1.
|
131
114
|
|
132
115
|
## Contributing to Klipbook
|
133
116
|
|
@@ -135,4 +118,4 @@ Fork the project on [Github](https://github.com/grassdog/klipbook), add tests fo
|
|
135
118
|
|
136
119
|
## Copyright
|
137
120
|
|
138
|
-
Copyright (c)
|
121
|
+
Copyright (c) 2018 Ray Grasso. See LICENSE.txt for further details.
|
data/Rakefile
CHANGED
@@ -9,10 +9,6 @@ desc 'Run specs'
|
|
9
9
|
RSpec::Core::RakeTask.new(:spec)
|
10
10
|
|
11
11
|
Cucumber::Rake::Task.new(:features) do |t|
|
12
|
-
t.cucumber_opts = '--format pretty --tags ~@slow'
|
13
|
-
end
|
14
|
-
|
15
|
-
Cucumber::Rake::Task.new(:allfeatures) do |t|
|
16
12
|
t.cucumber_opts = '--format pretty'
|
17
13
|
end
|
18
14
|
|
data/klipbook.gemspec
CHANGED
@@ -28,7 +28,6 @@ Gem::Specification.new do |spec|
|
|
28
28
|
spec.require_paths = ["lib"]
|
29
29
|
|
30
30
|
spec.add_dependency "commander"
|
31
|
-
spec.add_dependency "mechanize"
|
32
31
|
spec.add_development_dependency "bundler"
|
33
32
|
spec.add_development_dependency "rake"
|
34
33
|
spec.add_development_dependency "rspec"
|
data/lib/klipbook.rb
CHANGED
data/lib/klipbook/cli.rb
CHANGED
@@ -13,19 +13,17 @@ module Klipbook
|
|
13
13
|
program :version, Klipbook::VERSION
|
14
14
|
program :description, "Klipbook exports the clippings you've saved on your Kindle into JSON, Markdown, or pretty HTML"
|
15
15
|
|
16
|
-
program :help, 'Source', "You must specify
|
16
|
+
program :help, 'Source', "You must specify `--from-file` as an input."
|
17
17
|
program :help, 'Config', "Note that command options can be stored in a file called ~/.klipbookrc. This file is YAML formatted and options should be snake case e.g.\n\n" +
|
18
|
-
":from_site: my-kindle-user@blah.com:my-kindle-password\n" +
|
19
18
|
":output_dir: ~/my/default/output/directory\n"
|
20
19
|
|
21
20
|
default_command :help
|
22
21
|
|
23
22
|
command :list do |c|
|
24
23
|
c.syntax = "klipbook list"
|
25
|
-
c.description = "List the books
|
24
|
+
c.description = "List the books in the clippings file"
|
26
25
|
|
27
26
|
c.option '--from-file FILE', String, "Input clippings file"
|
28
|
-
c.option '--from-site username:password', String, "Credentials for Kindle highlights site"
|
29
27
|
c.option '-c', '--count COUNT', Integer, "Maximum number of books to list (default is #{Config::DEFAULT_MAXBOOKS})"
|
30
28
|
|
31
29
|
c.action do |_args, options|
|
@@ -40,7 +38,6 @@ module Klipbook
|
|
40
38
|
c.description = 'Export book clippings'
|
41
39
|
|
42
40
|
c.option '--from-file FILE', String, "Input clippings file"
|
43
|
-
c.option '--from-site username:password', String, "Credentials for Kindle highlights site"
|
44
41
|
c.option '-c', '--count COUNT', Integer, "Maximum number of books to list (default is #{Config::DEFAULT_MAXBOOKS})"
|
45
42
|
c.option '--format FORMAT', "Format to export in [html, markdown, or json]"
|
46
43
|
c.option '--output-dir DIRECTORY', "Directory to export files to (default pwd)"
|
@@ -59,9 +56,7 @@ module Klipbook
|
|
59
56
|
private
|
60
57
|
|
61
58
|
def merge_config(options, config)
|
62
|
-
|
63
|
-
[:from_site, :from_file].each { |key| config.delete(key) }
|
64
|
-
end
|
59
|
+
config.delete(:from_file) if options.from_file
|
65
60
|
|
66
61
|
options.default config
|
67
62
|
end
|
@@ -22,8 +22,8 @@ module Klipbook
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def exit_unless_valid_source(options)
|
25
|
-
unless options.from_file
|
26
|
-
logger.error "Error: You must specify
|
25
|
+
unless options.from_file
|
26
|
+
logger.error "Error: You must specify `--from-file` as an input."
|
27
27
|
exit 127
|
28
28
|
end
|
29
29
|
end
|
@@ -4,24 +4,11 @@ module Klipbook
|
|
4
4
|
def self.build(options)
|
5
5
|
if options.from_file
|
6
6
|
file_source(options.from_file, options.count)
|
7
|
-
elsif options.from_site
|
8
|
-
site_source(options.from_site, options.count)
|
9
7
|
else
|
10
8
|
raise "Unknown source type"
|
11
9
|
end
|
12
10
|
end
|
13
11
|
|
14
|
-
def self.site_source(credentials, max_books)
|
15
|
-
unless credentials =~ /(.+):(.+)/
|
16
|
-
logger.error "Error: your credentials need to be in username:password format."
|
17
|
-
exit 127
|
18
|
-
end
|
19
|
-
|
20
|
-
username = $1
|
21
|
-
password = $2
|
22
|
-
Sources::AmazonSite::SiteScraper.new(username, password, max_books)
|
23
|
-
end
|
24
|
-
|
25
12
|
def self.file_source(file, max_books)
|
26
13
|
Sources::KindleDevice::File.new(File.read(file), max_books)
|
27
14
|
end
|
data/lib/klipbook/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: klipbook
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ray Grasso
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-11-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: commander
|
@@ -24,20 +24,6 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: mechanize
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
28
|
name: bundler
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -133,7 +119,6 @@ extra_rdoc_files: []
|
|
133
119
|
files:
|
134
120
|
- ".gitignore"
|
135
121
|
- ".rspec"
|
136
|
-
- ".ruby-version"
|
137
122
|
- ".travis.yml"
|
138
123
|
- CHANGELOG.txt
|
139
124
|
- Gemfile
|
@@ -157,8 +142,6 @@ files:
|
|
157
142
|
- lib/klipbook/commands/list.rb
|
158
143
|
- lib/klipbook/config.rb
|
159
144
|
- lib/klipbook/logger.rb
|
160
|
-
- lib/klipbook/sources/amazon_site/book_scraper.rb
|
161
|
-
- lib/klipbook/sources/amazon_site/site_scraper.rb
|
162
145
|
- lib/klipbook/sources/book.rb
|
163
146
|
- lib/klipbook/sources/clipping.rb
|
164
147
|
- lib/klipbook/sources/kindle_device/entry.rb
|
@@ -190,7 +173,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
190
173
|
version: '0'
|
191
174
|
requirements: []
|
192
175
|
rubyforge_project:
|
193
|
-
rubygems_version: 2.
|
176
|
+
rubygems_version: 2.7.6
|
194
177
|
signing_key:
|
195
178
|
specification_version: 4
|
196
179
|
summary: Klipbook creates a nice html summary of the clippings you've created on your
|
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
2.3.1
|
@@ -1,65 +0,0 @@
|
|
1
|
-
module Klipbook::Sources
|
2
|
-
module AmazonSite
|
3
|
-
class BookScraper
|
4
|
-
|
5
|
-
def scrape_book(page)
|
6
|
-
page.search(".//div[@class='bookMain yourHighlightsHeader']").map { |element| build_book(page, element) }
|
7
|
-
end
|
8
|
-
|
9
|
-
private
|
10
|
-
|
11
|
-
def build_book(page, element)
|
12
|
-
Klipbook::Book.new.tap do |b|
|
13
|
-
b.asin = element.attribute("id").value.gsub(/_[0-9]+$/, "")
|
14
|
-
b.author = element.xpath("span[@class='author']").text.gsub("\n", "").gsub(" by ", "").strip
|
15
|
-
b.title = element.xpath("span/a").text
|
16
|
-
b.last_update = extract_last_update(element.xpath("div[@class='lastHighlighted']").text)
|
17
|
-
b.clippings = scrape_clippings(page)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def extract_last_update(text)
|
22
|
-
text = text.gsub('Last annotated on ', '')
|
23
|
-
DateTime.parse(text)
|
24
|
-
end
|
25
|
-
|
26
|
-
def scrape_clippings(page)
|
27
|
-
page.search(".//div[@class='highlightRow yourHighlight']").map { |element| build_clipping(element) }.flatten
|
28
|
-
end
|
29
|
-
|
30
|
-
def build_clipping(element)
|
31
|
-
location = extract_location(element)
|
32
|
-
annotation_id = element.xpath("form/input[@id='annotation_id']").attribute("value").value
|
33
|
-
note_text = element.xpath("p/span[@class='noteContent']").text
|
34
|
-
|
35
|
-
highlight = Klipbook::Clipping.new.tap do |c|
|
36
|
-
c.annotation_id = annotation_id
|
37
|
-
c.text = element.xpath("span[@class='highlight']").text
|
38
|
-
c.type = :highlight
|
39
|
-
c.location = location
|
40
|
-
end
|
41
|
-
|
42
|
-
if note_text.blank?
|
43
|
-
highlight
|
44
|
-
else
|
45
|
-
note = Klipbook::Clipping.new.tap do |c|
|
46
|
-
c.annotation_id = annotation_id
|
47
|
-
c.text = note_text
|
48
|
-
c.type = :note
|
49
|
-
c.location = location
|
50
|
-
end
|
51
|
-
|
52
|
-
[highlight, note]
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def extract_location(element)
|
57
|
-
if element.xpath("a[@class='k4pcReadMore readMore linkOut']").attribute("href").value =~ /location=([0-9]+)$/
|
58
|
-
$1.to_i
|
59
|
-
else
|
60
|
-
0
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|
@@ -1,80 +0,0 @@
|
|
1
|
-
require 'mechanize'
|
2
|
-
|
3
|
-
module Klipbook::Sources
|
4
|
-
module AmazonSite
|
5
|
-
class SiteScraper
|
6
|
-
def initialize(username, password, max_books,
|
7
|
-
book_scraper=Klipbook::Sources::AmazonSite::BookScraper.new,
|
8
|
-
message_stream=$stdout)
|
9
|
-
@username = username
|
10
|
-
@password = password
|
11
|
-
@max_books = max_books
|
12
|
-
@message_stream = message_stream
|
13
|
-
@agent = Mechanize.new do |a|
|
14
|
-
a.user_agent_alias = 'Mac Safari'
|
15
|
-
end
|
16
|
-
@book_scraper = book_scraper
|
17
|
-
end
|
18
|
-
|
19
|
-
def books
|
20
|
-
@books ||= fetch_up_to_max_books
|
21
|
-
end
|
22
|
-
|
23
|
-
private
|
24
|
-
|
25
|
-
def fetch_up_to_max_books
|
26
|
-
scrape_books(fetch_first_books_page)
|
27
|
-
end
|
28
|
-
|
29
|
-
def fetch_first_books_page
|
30
|
-
welcome_page = get_welcome_page
|
31
|
-
|
32
|
-
raise 'Invalid Username or password' unless welcome_page.title == 'Amazon Kindle: Home'
|
33
|
-
|
34
|
-
@agent.click(welcome_page.link_with(:text => /Your Highlights/))
|
35
|
-
end
|
36
|
-
|
37
|
-
def get_welcome_page
|
38
|
-
@message_stream.puts 'Logging into site'
|
39
|
-
|
40
|
-
begin
|
41
|
-
page = @agent.get("https://www.amazon.com/ap/signin?openid.return_to=https%3A%2F%2Fkindle.amazon.com%3A443%2Fauthenticate%2Flogin_callback%3Fwctx%3D%252F&pageId=amzn_kindle&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.pape.max_auth_age=0&openid.assoc_handle=amzn_kindle&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select")
|
42
|
-
rescue
|
43
|
-
raise 'Could not connect to Amazon Kindle Site'
|
44
|
-
end
|
45
|
-
|
46
|
-
login_form = page.form('signIn').tap do |f|
|
47
|
-
f.email = @username
|
48
|
-
f.password = @password
|
49
|
-
end
|
50
|
-
|
51
|
-
@agent.submit(login_form)
|
52
|
-
end
|
53
|
-
|
54
|
-
def scrape_books(page)
|
55
|
-
books = []
|
56
|
-
@message_stream.print 'Fetching books '
|
57
|
-
|
58
|
-
@max_books.times do |count|
|
59
|
-
@message_stream.print '.'
|
60
|
-
books << @book_scraper.scrape_book(page)
|
61
|
-
page = get_next_page(page)
|
62
|
-
break unless page
|
63
|
-
end
|
64
|
-
|
65
|
-
puts ' Done!'
|
66
|
-
|
67
|
-
books.flatten
|
68
|
-
end
|
69
|
-
|
70
|
-
def get_next_page(page)
|
71
|
-
next_book_link = page.link_with(:dom_id => "nextBookLink")
|
72
|
-
if next_book_link
|
73
|
-
@agent.click(next_book_link)
|
74
|
-
else
|
75
|
-
nil
|
76
|
-
end
|
77
|
-
end
|
78
|
-
end
|
79
|
-
end
|
80
|
-
end
|