klipbook 0.3.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. data/Gemfile +6 -5
  2. data/Gemfile.lock +62 -25
  3. data/Guardfile +1 -2
  4. data/README.md +66 -18
  5. data/Rakefile +5 -1
  6. data/bin/klipbook +85 -1
  7. data/example.png +0 -0
  8. data/features/collate.feature +51 -0
  9. data/features/fixtures/clippings-for-three-books.txt +105 -0
  10. data/features/list.feature +31 -0
  11. data/features/step_definitions/collate_steps.rb +61 -0
  12. data/features/step_definitions/list_steps.rb +15 -0
  13. data/features/support/env.rb +5 -1
  14. data/klipbook.gemspec +49 -32
  15. data/lib/klipbook/book.rb +18 -0
  16. data/lib/klipbook/clipping.rb +4 -10
  17. data/lib/klipbook/collator.rb +17 -0
  18. data/lib/klipbook/config.rb +22 -0
  19. data/lib/klipbook/fetcher.rb +29 -0
  20. data/lib/klipbook/invalid_source_error.rb +12 -0
  21. data/lib/klipbook/output/book_helpers.rb +12 -0
  22. data/lib/klipbook/{book_summary.erb → output/html_book_summary.erb} +65 -11
  23. data/lib/klipbook/output/html_summary_writer.rb +42 -0
  24. data/lib/klipbook/printer.rb +18 -0
  25. data/lib/klipbook/sources/amazon_site/book_scraper.rb +67 -0
  26. data/lib/klipbook/sources/amazon_site/scraper.rb +78 -0
  27. data/lib/klipbook/sources/kindle_device/entry.rb +11 -0
  28. data/lib/klipbook/sources/kindle_device/entry_parser.rb +85 -0
  29. data/lib/klipbook/sources/kindle_device/file.rb +57 -0
  30. data/lib/klipbook/sources/kindle_device/file_parser.rb +33 -0
  31. data/lib/klipbook/version.rb +1 -1
  32. data/lib/klipbook.rb +18 -5
  33. data/spec/lib/klipbook/book_spec.rb +33 -0
  34. data/spec/lib/klipbook/collator_spec.rb +40 -0
  35. data/spec/lib/klipbook/fetcher_spec.rb +81 -0
  36. data/spec/lib/klipbook/output/html_summary_writer_spec.rb +90 -0
  37. data/spec/lib/klipbook/printer_spec.rb +45 -0
  38. data/spec/lib/klipbook/sources/kindle_device/entry_parser_spec.rb +275 -0
  39. data/spec/lib/klipbook/sources/kindle_device/file_parser_spec.rb +68 -0
  40. data/spec/lib/klipbook/sources/kindle_device/file_spec.rb +163 -0
  41. metadata +158 -58
  42. data/features/list_books.feature +0 -23
  43. data/features/print_book_summary.feature +0 -10
  44. data/features/step_definitions/klipbook_steps.rb +0 -87
  45. data/lib/klipbook/book_summary.rb +0 -35
  46. data/lib/klipbook/cli.rb +0 -49
  47. data/lib/klipbook/clippings_file.rb +0 -50
  48. data/lib/klipbook/clippings_parser.rb +0 -98
  49. data/lib/klipbook/runner.rb +0 -29
  50. data/spec/lib/klipbook/book_summary_spec.rb +0 -30
  51. data/spec/lib/klipbook/clipping_spec.rb +0 -17
  52. data/spec/lib/klipbook/clippings_file_spec.rb +0 -60
  53. data/spec/lib/klipbook/clippings_parser_spec.rb +0 -367
  54. data/spec/lib/klipbook/runner_spec.rb +0 -87
@@ -1,8 +1,12 @@
1
1
  require 'rubygems'
2
2
  require 'bundler/setup'
3
3
 
4
+ require 'aruba/cucumber'
5
+
4
6
  $LOAD_PATH << File.expand_path('../../../lib', __FILE__)
5
7
  require 'klipbook'
6
8
  require 'fileutils'
7
9
 
8
- TEST_DIR = File.expand_path('../../../tmp/test', __FILE__)
10
+ Before('@slow') do
11
+ @aruba_io_wait_seconds = 40
12
+ end
data/klipbook.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "klipbook"
8
- s.version = "0.3.0"
8
+ s.version = "1.0.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Ray Grasso"]
12
- s.date = "2011-12-29"
12
+ s.date = "2012-09-08"
13
13
  s.description = "Process your Kindle clippings file to generate a nicely formatted compilation of the clippings of the books you've read"
14
14
  s.email = "ray.grasso@gmail.com"
15
15
  s.executables = ["klipbook"]
@@ -29,26 +29,40 @@ Gem::Specification.new do |s|
29
29
  "Rakefile",
30
30
  "bin/klipbook",
31
31
  "example.png",
32
- "features/list_books.feature",
33
- "features/print_book_summary.feature",
34
- "features/step_definitions/klipbook_steps.rb",
32
+ "features/collate.feature",
33
+ "features/fixtures/clippings-for-three-books.txt",
34
+ "features/list.feature",
35
+ "features/step_definitions/collate_steps.rb",
36
+ "features/step_definitions/list_steps.rb",
35
37
  "features/support/env.rb",
36
38
  "klipbook.gemspec",
37
39
  "lib/klipbook.rb",
38
40
  "lib/klipbook/blank.rb",
39
- "lib/klipbook/book_summary.erb",
40
- "lib/klipbook/book_summary.rb",
41
- "lib/klipbook/cli.rb",
41
+ "lib/klipbook/book.rb",
42
42
  "lib/klipbook/clipping.rb",
43
- "lib/klipbook/clippings_file.rb",
44
- "lib/klipbook/clippings_parser.rb",
45
- "lib/klipbook/runner.rb",
43
+ "lib/klipbook/collator.rb",
44
+ "lib/klipbook/config.rb",
45
+ "lib/klipbook/fetcher.rb",
46
+ "lib/klipbook/invalid_source_error.rb",
47
+ "lib/klipbook/output/book_helpers.rb",
48
+ "lib/klipbook/output/html_book_summary.erb",
49
+ "lib/klipbook/output/html_summary_writer.rb",
50
+ "lib/klipbook/printer.rb",
51
+ "lib/klipbook/sources/amazon_site/book_scraper.rb",
52
+ "lib/klipbook/sources/amazon_site/scraper.rb",
53
+ "lib/klipbook/sources/kindle_device/entry.rb",
54
+ "lib/klipbook/sources/kindle_device/entry_parser.rb",
55
+ "lib/klipbook/sources/kindle_device/file.rb",
56
+ "lib/klipbook/sources/kindle_device/file_parser.rb",
46
57
  "lib/klipbook/version.rb",
47
- "spec/lib/klipbook/book_summary_spec.rb",
48
- "spec/lib/klipbook/clipping_spec.rb",
49
- "spec/lib/klipbook/clippings_file_spec.rb",
50
- "spec/lib/klipbook/clippings_parser_spec.rb",
51
- "spec/lib/klipbook/runner_spec.rb",
58
+ "spec/lib/klipbook/book_spec.rb",
59
+ "spec/lib/klipbook/collator_spec.rb",
60
+ "spec/lib/klipbook/fetcher_spec.rb",
61
+ "spec/lib/klipbook/output/html_summary_writer_spec.rb",
62
+ "spec/lib/klipbook/printer_spec.rb",
63
+ "spec/lib/klipbook/sources/kindle_device/entry_parser_spec.rb",
64
+ "spec/lib/klipbook/sources/kindle_device/file_parser_spec.rb",
65
+ "spec/lib/klipbook/sources/kindle_device/file_spec.rb",
52
66
  "spec/spec_helper.rb",
53
67
  "spec/support/rspec2.rb",
54
68
  "spec/support/with_rr.rb"
@@ -56,58 +70,61 @@ Gem::Specification.new do |s|
56
70
  s.homepage = "https://github.com/grassdog/klipbook"
57
71
  s.licenses = ["MIT"]
58
72
  s.require_paths = ["lib"]
59
- s.rubygems_version = "1.8.10"
73
+ s.rubygems_version = "1.8.23"
60
74
  s.summary = "Klipbook creates a nice html summary of the clippings you've created on your Kindle."
61
75
 
62
76
  if s.respond_to? :specification_version then
63
77
  s.specification_version = 3
64
78
 
65
79
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
66
- s.add_runtime_dependency(%q<thor>, [">= 0"])
80
+ s.add_runtime_dependency(%q<gli>, [">= 0"])
81
+ s.add_runtime_dependency(%q<mechanize>, [">= 0"])
82
+ s.add_runtime_dependency(%q<rainbow>, [">= 0"])
67
83
  s.add_development_dependency(%q<rspec>, [">= 0"])
68
84
  s.add_development_dependency(%q<rr>, [">= 0"])
69
85
  s.add_development_dependency(%q<bundler>, [">= 0"])
70
86
  s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
71
87
  s.add_development_dependency(%q<rcov>, [">= 0"])
88
+ s.add_development_dependency(%q<pry>, [">= 0"])
72
89
  s.add_development_dependency(%q<cucumber>, [">= 0"])
90
+ s.add_development_dependency(%q<aruba>, [">= 0"])
73
91
  s.add_development_dependency(%q<guard>, [">= 0"])
74
92
  s.add_development_dependency(%q<guard-rspec>, [">= 0"])
75
93
  s.add_development_dependency(%q<guard-cucumber>, [">= 0"])
76
- s.add_development_dependency(%q<rb-inotify>, [">= 0"])
77
- s.add_development_dependency(%q<rb-fsevent>, [">= 0"])
78
- s.add_development_dependency(%q<rb-fchange>, [">= 0"])
79
- s.add_development_dependency(%q<growl_notify>, [">= 0"])
94
+ s.add_development_dependency(%q<terminal-notifier-guard>, [">= 0"])
80
95
  else
81
- s.add_dependency(%q<thor>, [">= 0"])
96
+ s.add_dependency(%q<gli>, [">= 0"])
97
+ s.add_dependency(%q<mechanize>, [">= 0"])
98
+ s.add_dependency(%q<rainbow>, [">= 0"])
82
99
  s.add_dependency(%q<rspec>, [">= 0"])
83
100
  s.add_dependency(%q<rr>, [">= 0"])
84
101
  s.add_dependency(%q<bundler>, [">= 0"])
85
102
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
86
103
  s.add_dependency(%q<rcov>, [">= 0"])
104
+ s.add_dependency(%q<pry>, [">= 0"])
87
105
  s.add_dependency(%q<cucumber>, [">= 0"])
106
+ s.add_dependency(%q<aruba>, [">= 0"])
88
107
  s.add_dependency(%q<guard>, [">= 0"])
89
108
  s.add_dependency(%q<guard-rspec>, [">= 0"])
90
109
  s.add_dependency(%q<guard-cucumber>, [">= 0"])
91
- s.add_dependency(%q<rb-inotify>, [">= 0"])
92
- s.add_dependency(%q<rb-fsevent>, [">= 0"])
93
- s.add_dependency(%q<rb-fchange>, [">= 0"])
94
- s.add_dependency(%q<growl_notify>, [">= 0"])
110
+ s.add_dependency(%q<terminal-notifier-guard>, [">= 0"])
95
111
  end
96
112
  else
97
- s.add_dependency(%q<thor>, [">= 0"])
113
+ s.add_dependency(%q<gli>, [">= 0"])
114
+ s.add_dependency(%q<mechanize>, [">= 0"])
115
+ s.add_dependency(%q<rainbow>, [">= 0"])
98
116
  s.add_dependency(%q<rspec>, [">= 0"])
99
117
  s.add_dependency(%q<rr>, [">= 0"])
100
118
  s.add_dependency(%q<bundler>, [">= 0"])
101
119
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
102
120
  s.add_dependency(%q<rcov>, [">= 0"])
121
+ s.add_dependency(%q<pry>, [">= 0"])
103
122
  s.add_dependency(%q<cucumber>, [">= 0"])
123
+ s.add_dependency(%q<aruba>, [">= 0"])
104
124
  s.add_dependency(%q<guard>, [">= 0"])
105
125
  s.add_dependency(%q<guard-rspec>, [">= 0"])
106
126
  s.add_dependency(%q<guard-cucumber>, [">= 0"])
107
- s.add_dependency(%q<rb-inotify>, [">= 0"])
108
- s.add_dependency(%q<rb-fsevent>, [">= 0"])
109
- s.add_dependency(%q<rb-fchange>, [">= 0"])
110
- s.add_dependency(%q<growl_notify>, [">= 0"])
127
+ s.add_dependency(%q<terminal-notifier-guard>, [">= 0"])
111
128
  end
112
129
  end
113
130
 
@@ -0,0 +1,18 @@
1
+ module Klipbook
2
+ class Book
3
+ attr_accessor :asin, :author, :title, :last_update, :clippings
4
+
5
+ def initialize
6
+ yield self if block_given?
7
+ end
8
+
9
+ def title_and_author
10
+ author_txt = author ? " by #{author}" : ''
11
+ "#{title}#{author_txt}"
12
+ end
13
+
14
+ def get_binding
15
+ binding
16
+ end
17
+ end
18
+ end
@@ -1,15 +1,9 @@
1
- require 'ostruct'
2
- require 'date'
3
-
4
1
  module Klipbook
5
- class Clipping < OpenStruct
6
- def initialize(attributes)
7
- super(attributes)
8
- self.added_on = DateTime.strptime(self.added_on, '%A, %B %d, %Y, %I:%M %p') if self.added_on
9
- end
2
+ class Clipping
3
+ attr_accessor :annotation_id, :text, :location, :type, :page
10
4
 
11
- def <=>(other)
12
- (self.location || 0) <=> (other.location || 0)
5
+ def initialize
6
+ yield self if block_given?
13
7
  end
14
8
  end
15
9
  end
@@ -0,0 +1,17 @@
1
+ module Klipbook
2
+ class Collator
3
+ def initialize(books, summary_writer=Klipbook::Output::HtmlSummaryWriter.new)
4
+ @books = books
5
+ @summary_writer = summary_writer
6
+ end
7
+
8
+ def collate_books(output_dir, force, message_stream=$stdout)
9
+ message_stream.puts "Using output directory: #{output_dir}"
10
+
11
+ @books.each do |book|
12
+ @summary_writer.write(book, output_dir, force)
13
+ end
14
+ end
15
+ end
16
+ end
17
+
@@ -0,0 +1,22 @@
1
+ module Klipbook
2
+ class Config
3
+ def initialize(config_file_name='.klipbookrc')
4
+ @config_file_name = config_file_name
5
+ end
6
+
7
+ def read
8
+ merge_config_from_rc_file({})
9
+ end
10
+
11
+ def merge_config_from_rc_file(config)
12
+ config_file = File.join(File.expand_path(ENV['HOME']), @config_file_name)
13
+
14
+ if config_file && File.exist?(config_file)
15
+ require 'yaml'
16
+ config.merge!(File.open(config_file) { |file| YAML::load(file) })
17
+ end
18
+
19
+ config
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,29 @@
1
+ module Klipbook
2
+ class Fetcher
3
+ def initialize(source_spec, max_books)
4
+ raise InvalidSourceError unless valid_source(source_spec)
5
+
6
+ if (source_spec =~ /file:(.+)/)
7
+ raw_file = File.open($1, 'r')
8
+ @source = Klipbook::Sources::KindleDevice::File.new(raw_file.read.strip, max_books)
9
+ elsif (source_spec =~ /site:(.+):(.+)/)
10
+ username = $1
11
+ password = $2
12
+ @source = Klipbook::Sources::AmazonSite::Scraper.new(username, password, max_books)
13
+ else
14
+ raise InvalidSourceError("Unrecognised source type. Only 'file' and 'site' are supported")
15
+ end
16
+ end
17
+
18
+ def fetch_books
19
+ @source.books
20
+ end
21
+
22
+ private
23
+
24
+ def valid_source(source_spec)
25
+ source_spec =~ /(file:|site:.+:.+)/
26
+ end
27
+ end
28
+ end
29
+
@@ -0,0 +1,12 @@
1
+ class InvalidSourceError < RuntimeError
2
+
3
+ DEFAULT_MESSAGE = "Please provide a valid source.\n" +
4
+ "e.g.\n" +
5
+ " file:path/to/my-clippings-file.txt\n" +
6
+ " site:my-kindle-user@blah.com:my-kindle-password"
7
+
8
+ def initialize(msg = DEFAULT_MESSAGE)
9
+ super
10
+ end
11
+ end
12
+
@@ -0,0 +1,12 @@
1
+ module Klipbook::Output
2
+ module BookHelpers
3
+ def location_html(location)
4
+ if self.asin
5
+ "<a href=\"kindle://book?action=open&asin=#{asin}&location=#{location}\">loc #{location}</a>"
6
+ else
7
+ "loc #{location}"
8
+ end
9
+ end
10
+ end
11
+ end
12
+
@@ -3,7 +3,7 @@
3
3
  <head>
4
4
  <meta charset="utf-8"/>
5
5
  <meta name="generator" content="Klipbook"/>
6
- <title><%= @clippings.first.title %> - Collated Kindle Clippings</title>
6
+ <title><%= title %> - Collated Kindle Clippings</title>
7
7
  <style type="text/css">
8
8
  body {
9
9
  color: #333;
@@ -26,18 +26,44 @@
26
26
 
27
27
  h1 {
28
28
  line-height: 1.1em;
29
+ font-weight: bold;
30
+ }
31
+
32
+ .byline {
33
+ width: 730px;
29
34
  }
30
35
 
31
36
  h2 {
37
+ float: left;
32
38
  line-height: 1.1em;
33
39
  font-size: 1.3em;
34
40
  color: #555;
41
+ font-weight: bold;
35
42
  font-style: italic;
36
43
  }
37
44
 
45
+ h3 {
46
+ float: right;
47
+ font-style: italic;
48
+ font-weight: normal;
49
+ color: #555;
50
+ font-size: 1.1em;
51
+ }
52
+
53
+ .clearfix:before,
54
+ .clearfix:after {
55
+ content: " ";
56
+ display: table;
57
+ }
58
+
59
+ .clearfix:after {
60
+ clear: both;
61
+ }
62
+
38
63
  ul {
39
64
  margin-top: 2em;
40
- width: 43em;
65
+ width: 690px;
66
+ padding-left: 40px;
41
67
  }
42
68
 
43
69
  ul li {
@@ -51,8 +77,8 @@
51
77
 
52
78
  ul li footer {
53
79
  text-align: right;
54
- font-size: .85em;
55
80
  color: #8C8C8C;
81
+ margin-left: 20em;
56
82
  }
57
83
 
58
84
  li.note p {
@@ -60,24 +86,53 @@
60
86
  }
61
87
 
62
88
  footer {
63
- font-size: .85em;
64
- margin-left: 20em;
89
+ font-size: .8em;
65
90
  }
66
91
 
67
92
  footer span {
68
93
  font-style: italic;
69
94
  }
95
+
96
+ footer a {
97
+ text-decoration: underline;
98
+ color: #8C8C8C;
99
+ }
100
+
101
+ footer a, footer a:link, footer a:visited {
102
+ color: #8C8C8C;
103
+ }
104
+
105
+ footer a:hover {
106
+ color: #084ab7;
107
+ }
108
+
109
+ body > footer {
110
+ font-size: .8em;
111
+ margin-left: 0;
112
+ width: 690px;
113
+ text-align: center;
114
+ padding-left: 40px;
115
+ }
70
116
  </style>
71
117
  </head>
72
118
  <body>
73
119
 
74
- <h1><%= @clippings.first.title %></h1>
75
- <% unless @author.blank? %>
120
+ <h1><%= title %></h1>
121
+
122
+ <div class="byline">
123
+ <% unless author.blank? %>
76
124
  <h2>by <%= author %></h2>
77
125
  <% end %>
78
126
 
127
+ <% if last_update %>
128
+ <h3>Last updated: <%= last_update.strftime('%e %B %Y') %></h3>
129
+ <% end %>
130
+ </div>
131
+
132
+ <div class="clearfix"></div>
133
+
79
134
  <ul>
80
- <% @clippings.each do |clipping| %>
135
+ <% clippings.each do |clipping| %>
81
136
  <% unless clipping.text.blank? %>
82
137
  <li class="<%= clipping.type %>">
83
138
  <p>
@@ -85,8 +140,7 @@
85
140
  </p>
86
141
  <footer>
87
142
  <%= clipping.type %>
88
- <% if include_pages && clipping.page %> on page <%= clipping.page %><% end %>
89
- <% if clipping.location %> @ loc <%= clipping.location %><% end %>
143
+ <% if clipping.location %> @ <%= location_html(clipping.location) %><% end %>
90
144
  </footer>
91
145
  </li>
92
146
  <% end %>
@@ -94,7 +148,7 @@
94
148
  </ul>
95
149
 
96
150
  <footer>
97
- Generated by <a href="https://github.com/grassdog/klipbook">Klipbook <%= Klipbook::VERSION %></a> on <span><%= DateTime.now.strftime('%e %b %Y at %l:%M %P') %></span>
151
+ <%= clippings.count %> clippings &bull; Generated by <a href="https://github.com/grassdog/klipbook">Klipbook <%= Klipbook::VERSION %></a> on <span><%= DateTime.now.strftime('%e %b %Y at %l:%M %P') %></span>
98
152
  </footer>
99
153
  </body>
100
154
  </html>
@@ -0,0 +1,42 @@
1
+ require 'erb'
2
+
3
+ module Klipbook::Output
4
+ class HtmlSummaryWriter
5
+ def initialize(message_stream=$stdout)
6
+ @message_stream = message_stream
7
+ end
8
+
9
+ def write(book, output_dir, force)
10
+ require 'rainbow'
11
+
12
+ book.extend Klipbook::Output::BookHelpers
13
+
14
+ filename = filename_for_book(book)
15
+ filepath = File.join(output_dir, filename)
16
+
17
+ if !force && File.exists?(filepath)
18
+ @message_stream.puts("Skipping ".foreground(:yellow) + filename)
19
+ return
20
+ end
21
+
22
+ @message_stream.puts("Writing ".foreground(:green) + filename)
23
+ File.open(filepath, 'w') do |f|
24
+ f.write generate_html(book)
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def filename_for_book(book)
31
+ "#{book.title_and_author}.html"
32
+ end
33
+
34
+ def generate_html(book)
35
+ ERB.new(template, 0, '%<>').result(book.get_binding)
36
+ end
37
+
38
+ def template
39
+ @template ||= File.read(File.join(File.dirname(__FILE__), 'html_book_summary.erb'))
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,18 @@
1
+ module Klipbook
2
+ class Printer
3
+ def initialize(books)
4
+ @books = books
5
+ end
6
+
7
+ def print(output=$stdout)
8
+ if @books.empty?
9
+ output.puts 'No books available'
10
+ else
11
+ output.puts 'Book list:'
12
+ @books.each_with_index do |book, index|
13
+ output.puts "[#{index + 1}] #{book.title_and_author}"
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,67 @@
1
+ require 'mechanize'
2
+
3
+ module Klipbook::Sources
4
+ module AmazonSite
5
+ class BookScraper
6
+
7
+ def scrape_book(page)
8
+ page.search(".//div[@class='bookMain yourHighlightsHeader']").map { |element| build_book(page, element) }
9
+ end
10
+
11
+ private
12
+
13
+ def build_book(page, element)
14
+ Klipbook::Book.new do |b|
15
+ b.asin = element.attribute("id").value.gsub(/_[0-9]+$/, "")
16
+ b.author = element.xpath("span[@class='author']").text.gsub("\n", "").gsub(" by ", "").strip
17
+ b.title = element.xpath("span/a").text
18
+ b.last_update = extract_last_update(element.xpath("div[@class='lastHighlighted']").text)
19
+ b.clippings = scrape_clippings(page)
20
+ end
21
+ end
22
+
23
+ def extract_last_update(text)
24
+ text = text.gsub('Last annotated on ', '')
25
+ DateTime.parse(text)
26
+ end
27
+
28
+ def scrape_clippings(page)
29
+ page.search(".//div[@class='highlightRow yourHighlight']").map { |element| build_clipping(element) }.flatten
30
+ end
31
+
32
+ def build_clipping(element)
33
+ location = extract_location(element)
34
+ annotation_id = element.xpath("form/input[@id='annotation_id']").attribute("value").value
35
+ note_text = element.xpath("p/span[@class='noteContent']").text
36
+
37
+ highlight = Klipbook::Clipping.new do |c|
38
+ c.annotation_id = annotation_id
39
+ c.text = element.xpath("span[@class='highlight']").text
40
+ c.type = :highlight
41
+ c.location = location
42
+ end
43
+
44
+ if note_text.blank?
45
+ highlight
46
+ else
47
+ note = Klipbook::Clipping.new do |c|
48
+ c.annotation_id = annotation_id
49
+ c.text = note_text
50
+ c.type = :note
51
+ c.location = location
52
+ end
53
+
54
+ [highlight, note]
55
+ end
56
+ end
57
+
58
+ def extract_location(element)
59
+ if element.xpath("a[@class='k4pcReadMore readMore linkOut']").attribute("href").value =~ /location=([0-9]+)$/
60
+ $1.to_i
61
+ else
62
+ 0
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,78 @@
1
+ require 'mechanize'
2
+
3
+ module Klipbook::Sources
4
+ module AmazonSite
5
+ class Scraper
6
+ def initialize(username, password, max_books,
7
+ book_scraper=Klipbook::Sources::AmazonSite::BookScraper.new,
8
+ message_stream=$stdout)
9
+ @username = username
10
+ @password = password
11
+ @max_books = max_books
12
+ @message_stream = message_stream
13
+ @agent = Mechanize.new
14
+ @book_scraper = book_scraper
15
+ end
16
+
17
+ def books
18
+ @books ||= fetch_books
19
+ end
20
+
21
+ private
22
+
23
+ def fetch_books
24
+ login_form = login
25
+
26
+ signin_submission = @agent.submit(login_form)
27
+
28
+ raise 'Invalid Username or password' unless signin_submission.title == "Amazon Kindle: Home"
29
+
30
+ page = @agent.click(signin_submission.link_with(:text => /Your Highlights/))
31
+
32
+ scrape_books(page)
33
+ end
34
+
35
+ def login
36
+ @message_stream.puts 'Logging into site'
37
+
38
+ begin
39
+ page = @agent.get("https://www.amazon.com/ap/signin?openid.return_to=https%3A%2F%2Fkindle.amazon.com%3A443%2Fauthenticate%2Flogin_callback%3Fwctx%3D%252F&pageId=amzn_kindle&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.pape.max_auth_age=0&openid.assoc_handle=amzn_kindle&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select")
40
+ rescue
41
+ raise 'Could not connect to Amazon Kindle Site'
42
+ end
43
+
44
+ login_form = page.form('signIn')
45
+
46
+ login_form.email = @username
47
+ login_form.password = @password
48
+
49
+ login_form
50
+ end
51
+
52
+ def scrape_books(page)
53
+ books = []
54
+ @message_stream.print 'Fetching books '
55
+
56
+ @max_books.times do |count|
57
+ @message_stream.print '.'
58
+ books << @book_scraper.scrape_book(page)
59
+ page = get_next_page(page)
60
+ break unless page
61
+ end
62
+
63
+ puts ' Done!'
64
+
65
+ books.flatten
66
+ end
67
+
68
+ def get_next_page(page)
69
+ ret = page.search(".//a[@id='nextBookLink']").first
70
+ if ret and ret.attribute("href")
71
+ @agent.get("https://kindle.amazon.com" + ret.attribute("href").value)
72
+ else
73
+ nil
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,11 @@
1
+ module Klipbook::Sources
2
+ module KindleDevice
3
+ class Entry
4
+ attr_accessor :title, :author, :type, :location, :page, :added_on, :text
5
+
6
+ def initialize
7
+ yield self if block_given?
8
+ end
9
+ end
10
+ end
11
+ end