klipbook 0.3.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. data/Gemfile +6 -5
  2. data/Gemfile.lock +62 -25
  3. data/Guardfile +1 -2
  4. data/README.md +66 -18
  5. data/Rakefile +5 -1
  6. data/bin/klipbook +85 -1
  7. data/example.png +0 -0
  8. data/features/collate.feature +51 -0
  9. data/features/fixtures/clippings-for-three-books.txt +105 -0
  10. data/features/list.feature +31 -0
  11. data/features/step_definitions/collate_steps.rb +61 -0
  12. data/features/step_definitions/list_steps.rb +15 -0
  13. data/features/support/env.rb +5 -1
  14. data/klipbook.gemspec +49 -32
  15. data/lib/klipbook/book.rb +18 -0
  16. data/lib/klipbook/clipping.rb +4 -10
  17. data/lib/klipbook/collator.rb +17 -0
  18. data/lib/klipbook/config.rb +22 -0
  19. data/lib/klipbook/fetcher.rb +29 -0
  20. data/lib/klipbook/invalid_source_error.rb +12 -0
  21. data/lib/klipbook/output/book_helpers.rb +12 -0
  22. data/lib/klipbook/{book_summary.erb → output/html_book_summary.erb} +65 -11
  23. data/lib/klipbook/output/html_summary_writer.rb +42 -0
  24. data/lib/klipbook/printer.rb +18 -0
  25. data/lib/klipbook/sources/amazon_site/book_scraper.rb +67 -0
  26. data/lib/klipbook/sources/amazon_site/scraper.rb +78 -0
  27. data/lib/klipbook/sources/kindle_device/entry.rb +11 -0
  28. data/lib/klipbook/sources/kindle_device/entry_parser.rb +85 -0
  29. data/lib/klipbook/sources/kindle_device/file.rb +57 -0
  30. data/lib/klipbook/sources/kindle_device/file_parser.rb +33 -0
  31. data/lib/klipbook/version.rb +1 -1
  32. data/lib/klipbook.rb +18 -5
  33. data/spec/lib/klipbook/book_spec.rb +33 -0
  34. data/spec/lib/klipbook/collator_spec.rb +40 -0
  35. data/spec/lib/klipbook/fetcher_spec.rb +81 -0
  36. data/spec/lib/klipbook/output/html_summary_writer_spec.rb +90 -0
  37. data/spec/lib/klipbook/printer_spec.rb +45 -0
  38. data/spec/lib/klipbook/sources/kindle_device/entry_parser_spec.rb +275 -0
  39. data/spec/lib/klipbook/sources/kindle_device/file_parser_spec.rb +68 -0
  40. data/spec/lib/klipbook/sources/kindle_device/file_spec.rb +163 -0
  41. metadata +158 -58
  42. data/features/list_books.feature +0 -23
  43. data/features/print_book_summary.feature +0 -10
  44. data/features/step_definitions/klipbook_steps.rb +0 -87
  45. data/lib/klipbook/book_summary.rb +0 -35
  46. data/lib/klipbook/cli.rb +0 -49
  47. data/lib/klipbook/clippings_file.rb +0 -50
  48. data/lib/klipbook/clippings_parser.rb +0 -98
  49. data/lib/klipbook/runner.rb +0 -29
  50. data/spec/lib/klipbook/book_summary_spec.rb +0 -30
  51. data/spec/lib/klipbook/clipping_spec.rb +0 -17
  52. data/spec/lib/klipbook/clippings_file_spec.rb +0 -60
  53. data/spec/lib/klipbook/clippings_parser_spec.rb +0 -367
  54. data/spec/lib/klipbook/runner_spec.rb +0 -87
@@ -1,8 +1,12 @@
1
1
  require 'rubygems'
2
2
  require 'bundler/setup'
3
3
 
4
+ require 'aruba/cucumber'
5
+
4
6
  $LOAD_PATH << File.expand_path('../../../lib', __FILE__)
5
7
  require 'klipbook'
6
8
  require 'fileutils'
7
9
 
8
- TEST_DIR = File.expand_path('../../../tmp/test', __FILE__)
10
+ Before('@slow') do
11
+ @aruba_io_wait_seconds = 40
12
+ end
data/klipbook.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "klipbook"
8
- s.version = "0.3.0"
8
+ s.version = "1.0.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Ray Grasso"]
12
- s.date = "2011-12-29"
12
+ s.date = "2012-09-08"
13
13
  s.description = "Process your Kindle clippings file to generate a nicely formatted compilation of the clippings of the books you've read"
14
14
  s.email = "ray.grasso@gmail.com"
15
15
  s.executables = ["klipbook"]
@@ -29,26 +29,40 @@ Gem::Specification.new do |s|
29
29
  "Rakefile",
30
30
  "bin/klipbook",
31
31
  "example.png",
32
- "features/list_books.feature",
33
- "features/print_book_summary.feature",
34
- "features/step_definitions/klipbook_steps.rb",
32
+ "features/collate.feature",
33
+ "features/fixtures/clippings-for-three-books.txt",
34
+ "features/list.feature",
35
+ "features/step_definitions/collate_steps.rb",
36
+ "features/step_definitions/list_steps.rb",
35
37
  "features/support/env.rb",
36
38
  "klipbook.gemspec",
37
39
  "lib/klipbook.rb",
38
40
  "lib/klipbook/blank.rb",
39
- "lib/klipbook/book_summary.erb",
40
- "lib/klipbook/book_summary.rb",
41
- "lib/klipbook/cli.rb",
41
+ "lib/klipbook/book.rb",
42
42
  "lib/klipbook/clipping.rb",
43
- "lib/klipbook/clippings_file.rb",
44
- "lib/klipbook/clippings_parser.rb",
45
- "lib/klipbook/runner.rb",
43
+ "lib/klipbook/collator.rb",
44
+ "lib/klipbook/config.rb",
45
+ "lib/klipbook/fetcher.rb",
46
+ "lib/klipbook/invalid_source_error.rb",
47
+ "lib/klipbook/output/book_helpers.rb",
48
+ "lib/klipbook/output/html_book_summary.erb",
49
+ "lib/klipbook/output/html_summary_writer.rb",
50
+ "lib/klipbook/printer.rb",
51
+ "lib/klipbook/sources/amazon_site/book_scraper.rb",
52
+ "lib/klipbook/sources/amazon_site/scraper.rb",
53
+ "lib/klipbook/sources/kindle_device/entry.rb",
54
+ "lib/klipbook/sources/kindle_device/entry_parser.rb",
55
+ "lib/klipbook/sources/kindle_device/file.rb",
56
+ "lib/klipbook/sources/kindle_device/file_parser.rb",
46
57
  "lib/klipbook/version.rb",
47
- "spec/lib/klipbook/book_summary_spec.rb",
48
- "spec/lib/klipbook/clipping_spec.rb",
49
- "spec/lib/klipbook/clippings_file_spec.rb",
50
- "spec/lib/klipbook/clippings_parser_spec.rb",
51
- "spec/lib/klipbook/runner_spec.rb",
58
+ "spec/lib/klipbook/book_spec.rb",
59
+ "spec/lib/klipbook/collator_spec.rb",
60
+ "spec/lib/klipbook/fetcher_spec.rb",
61
+ "spec/lib/klipbook/output/html_summary_writer_spec.rb",
62
+ "spec/lib/klipbook/printer_spec.rb",
63
+ "spec/lib/klipbook/sources/kindle_device/entry_parser_spec.rb",
64
+ "spec/lib/klipbook/sources/kindle_device/file_parser_spec.rb",
65
+ "spec/lib/klipbook/sources/kindle_device/file_spec.rb",
52
66
  "spec/spec_helper.rb",
53
67
  "spec/support/rspec2.rb",
54
68
  "spec/support/with_rr.rb"
@@ -56,58 +70,61 @@ Gem::Specification.new do |s|
56
70
  s.homepage = "https://github.com/grassdog/klipbook"
57
71
  s.licenses = ["MIT"]
58
72
  s.require_paths = ["lib"]
59
- s.rubygems_version = "1.8.10"
73
+ s.rubygems_version = "1.8.23"
60
74
  s.summary = "Klipbook creates a nice html summary of the clippings you've created on your Kindle."
61
75
 
62
76
  if s.respond_to? :specification_version then
63
77
  s.specification_version = 3
64
78
 
65
79
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
66
- s.add_runtime_dependency(%q<thor>, [">= 0"])
80
+ s.add_runtime_dependency(%q<gli>, [">= 0"])
81
+ s.add_runtime_dependency(%q<mechanize>, [">= 0"])
82
+ s.add_runtime_dependency(%q<rainbow>, [">= 0"])
67
83
  s.add_development_dependency(%q<rspec>, [">= 0"])
68
84
  s.add_development_dependency(%q<rr>, [">= 0"])
69
85
  s.add_development_dependency(%q<bundler>, [">= 0"])
70
86
  s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
71
87
  s.add_development_dependency(%q<rcov>, [">= 0"])
88
+ s.add_development_dependency(%q<pry>, [">= 0"])
72
89
  s.add_development_dependency(%q<cucumber>, [">= 0"])
90
+ s.add_development_dependency(%q<aruba>, [">= 0"])
73
91
  s.add_development_dependency(%q<guard>, [">= 0"])
74
92
  s.add_development_dependency(%q<guard-rspec>, [">= 0"])
75
93
  s.add_development_dependency(%q<guard-cucumber>, [">= 0"])
76
- s.add_development_dependency(%q<rb-inotify>, [">= 0"])
77
- s.add_development_dependency(%q<rb-fsevent>, [">= 0"])
78
- s.add_development_dependency(%q<rb-fchange>, [">= 0"])
79
- s.add_development_dependency(%q<growl_notify>, [">= 0"])
94
+ s.add_development_dependency(%q<terminal-notifier-guard>, [">= 0"])
80
95
  else
81
- s.add_dependency(%q<thor>, [">= 0"])
96
+ s.add_dependency(%q<gli>, [">= 0"])
97
+ s.add_dependency(%q<mechanize>, [">= 0"])
98
+ s.add_dependency(%q<rainbow>, [">= 0"])
82
99
  s.add_dependency(%q<rspec>, [">= 0"])
83
100
  s.add_dependency(%q<rr>, [">= 0"])
84
101
  s.add_dependency(%q<bundler>, [">= 0"])
85
102
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
86
103
  s.add_dependency(%q<rcov>, [">= 0"])
104
+ s.add_dependency(%q<pry>, [">= 0"])
87
105
  s.add_dependency(%q<cucumber>, [">= 0"])
106
+ s.add_dependency(%q<aruba>, [">= 0"])
88
107
  s.add_dependency(%q<guard>, [">= 0"])
89
108
  s.add_dependency(%q<guard-rspec>, [">= 0"])
90
109
  s.add_dependency(%q<guard-cucumber>, [">= 0"])
91
- s.add_dependency(%q<rb-inotify>, [">= 0"])
92
- s.add_dependency(%q<rb-fsevent>, [">= 0"])
93
- s.add_dependency(%q<rb-fchange>, [">= 0"])
94
- s.add_dependency(%q<growl_notify>, [">= 0"])
110
+ s.add_dependency(%q<terminal-notifier-guard>, [">= 0"])
95
111
  end
96
112
  else
97
- s.add_dependency(%q<thor>, [">= 0"])
113
+ s.add_dependency(%q<gli>, [">= 0"])
114
+ s.add_dependency(%q<mechanize>, [">= 0"])
115
+ s.add_dependency(%q<rainbow>, [">= 0"])
98
116
  s.add_dependency(%q<rspec>, [">= 0"])
99
117
  s.add_dependency(%q<rr>, [">= 0"])
100
118
  s.add_dependency(%q<bundler>, [">= 0"])
101
119
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
102
120
  s.add_dependency(%q<rcov>, [">= 0"])
121
+ s.add_dependency(%q<pry>, [">= 0"])
103
122
  s.add_dependency(%q<cucumber>, [">= 0"])
123
+ s.add_dependency(%q<aruba>, [">= 0"])
104
124
  s.add_dependency(%q<guard>, [">= 0"])
105
125
  s.add_dependency(%q<guard-rspec>, [">= 0"])
106
126
  s.add_dependency(%q<guard-cucumber>, [">= 0"])
107
- s.add_dependency(%q<rb-inotify>, [">= 0"])
108
- s.add_dependency(%q<rb-fsevent>, [">= 0"])
109
- s.add_dependency(%q<rb-fchange>, [">= 0"])
110
- s.add_dependency(%q<growl_notify>, [">= 0"])
127
+ s.add_dependency(%q<terminal-notifier-guard>, [">= 0"])
111
128
  end
112
129
  end
113
130
 
@@ -0,0 +1,18 @@
1
+ module Klipbook
2
+ class Book
3
+ attr_accessor :asin, :author, :title, :last_update, :clippings
4
+
5
+ def initialize
6
+ yield self if block_given?
7
+ end
8
+
9
+ def title_and_author
10
+ author_txt = author ? " by #{author}" : ''
11
+ "#{title}#{author_txt}"
12
+ end
13
+
14
+ def get_binding
15
+ binding
16
+ end
17
+ end
18
+ end
@@ -1,15 +1,9 @@
1
- require 'ostruct'
2
- require 'date'
3
-
4
1
  module Klipbook
5
- class Clipping < OpenStruct
6
- def initialize(attributes)
7
- super(attributes)
8
- self.added_on = DateTime.strptime(self.added_on, '%A, %B %d, %Y, %I:%M %p') if self.added_on
9
- end
2
+ class Clipping
3
+ attr_accessor :annotation_id, :text, :location, :type, :page
10
4
 
11
- def <=>(other)
12
- (self.location || 0) <=> (other.location || 0)
5
+ def initialize
6
+ yield self if block_given?
13
7
  end
14
8
  end
15
9
  end
@@ -0,0 +1,17 @@
1
+ module Klipbook
2
+ class Collator
3
+ def initialize(books, summary_writer=Klipbook::Output::HtmlSummaryWriter.new)
4
+ @books = books
5
+ @summary_writer = summary_writer
6
+ end
7
+
8
+ def collate_books(output_dir, force, message_stream=$stdout)
9
+ message_stream.puts "Using output directory: #{output_dir}"
10
+
11
+ @books.each do |book|
12
+ @summary_writer.write(book, output_dir, force)
13
+ end
14
+ end
15
+ end
16
+ end
17
+
@@ -0,0 +1,22 @@
1
+ module Klipbook
2
+ class Config
3
+ def initialize(config_file_name='.klipbookrc')
4
+ @config_file_name = config_file_name
5
+ end
6
+
7
+ def read
8
+ merge_config_from_rc_file({})
9
+ end
10
+
11
+ def merge_config_from_rc_file(config)
12
+ config_file = File.join(File.expand_path(ENV['HOME']), @config_file_name)
13
+
14
+ if config_file && File.exist?(config_file)
15
+ require 'yaml'
16
+ config.merge!(File.open(config_file) { |file| YAML::load(file) })
17
+ end
18
+
19
+ config
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,29 @@
1
+ module Klipbook
2
+ class Fetcher
3
+ def initialize(source_spec, max_books)
4
+ raise InvalidSourceError unless valid_source(source_spec)
5
+
6
+ if (source_spec =~ /file:(.+)/)
7
+ raw_file = File.open($1, 'r')
8
+ @source = Klipbook::Sources::KindleDevice::File.new(raw_file.read.strip, max_books)
9
+ elsif (source_spec =~ /site:(.+):(.+)/)
10
+ username = $1
11
+ password = $2
12
+ @source = Klipbook::Sources::AmazonSite::Scraper.new(username, password, max_books)
13
+ else
14
+ raise InvalidSourceError("Unrecognised source type. Only 'file' and 'site' are supported")
15
+ end
16
+ end
17
+
18
+ def fetch_books
19
+ @source.books
20
+ end
21
+
22
+ private
23
+
24
+ def valid_source(source_spec)
25
+ source_spec =~ /(file:|site:.+:.+)/
26
+ end
27
+ end
28
+ end
29
+
@@ -0,0 +1,12 @@
1
+ class InvalidSourceError < RuntimeError
2
+
3
+ DEFAULT_MESSAGE = "Please provide a valid source.\n" +
4
+ "e.g.\n" +
5
+ " file:path/to/my-clippings-file.txt\n" +
6
+ " site:my-kindle-user@blah.com:my-kindle-password"
7
+
8
+ def initialize(msg = DEFAULT_MESSAGE)
9
+ super
10
+ end
11
+ end
12
+
@@ -0,0 +1,12 @@
1
+ module Klipbook::Output
2
+ module BookHelpers
3
+ def location_html(location)
4
+ if self.asin
5
+ "<a href=\"kindle://book?action=open&asin=#{asin}&location=#{location}\">loc #{location}</a>"
6
+ else
7
+ "loc #{location}"
8
+ end
9
+ end
10
+ end
11
+ end
12
+
@@ -3,7 +3,7 @@
3
3
  <head>
4
4
  <meta charset="utf-8"/>
5
5
  <meta name="generator" content="Klipbook"/>
6
- <title><%= @clippings.first.title %> - Collated Kindle Clippings</title>
6
+ <title><%= title %> - Collated Kindle Clippings</title>
7
7
  <style type="text/css">
8
8
  body {
9
9
  color: #333;
@@ -26,18 +26,44 @@
26
26
 
27
27
  h1 {
28
28
  line-height: 1.1em;
29
+ font-weight: bold;
30
+ }
31
+
32
+ .byline {
33
+ width: 730px;
29
34
  }
30
35
 
31
36
  h2 {
37
+ float: left;
32
38
  line-height: 1.1em;
33
39
  font-size: 1.3em;
34
40
  color: #555;
41
+ font-weight: bold;
35
42
  font-style: italic;
36
43
  }
37
44
 
45
+ h3 {
46
+ float: right;
47
+ font-style: italic;
48
+ font-weight: normal;
49
+ color: #555;
50
+ font-size: 1.1em;
51
+ }
52
+
53
+ .clearfix:before,
54
+ .clearfix:after {
55
+ content: " ";
56
+ display: table;
57
+ }
58
+
59
+ .clearfix:after {
60
+ clear: both;
61
+ }
62
+
38
63
  ul {
39
64
  margin-top: 2em;
40
- width: 43em;
65
+ width: 690px;
66
+ padding-left: 40px;
41
67
  }
42
68
 
43
69
  ul li {
@@ -51,8 +77,8 @@
51
77
 
52
78
  ul li footer {
53
79
  text-align: right;
54
- font-size: .85em;
55
80
  color: #8C8C8C;
81
+ margin-left: 20em;
56
82
  }
57
83
 
58
84
  li.note p {
@@ -60,24 +86,53 @@
60
86
  }
61
87
 
62
88
  footer {
63
- font-size: .85em;
64
- margin-left: 20em;
89
+ font-size: .8em;
65
90
  }
66
91
 
67
92
  footer span {
68
93
  font-style: italic;
69
94
  }
95
+
96
+ footer a {
97
+ text-decoration: underline;
98
+ color: #8C8C8C;
99
+ }
100
+
101
+ footer a, footer a:link, footer a:visited {
102
+ color: #8C8C8C;
103
+ }
104
+
105
+ footer a:hover {
106
+ color: #084ab7;
107
+ }
108
+
109
+ body > footer {
110
+ font-size: .8em;
111
+ margin-left: 0;
112
+ width: 690px;
113
+ text-align: center;
114
+ padding-left: 40px;
115
+ }
70
116
  </style>
71
117
  </head>
72
118
  <body>
73
119
 
74
- <h1><%= @clippings.first.title %></h1>
75
- <% unless @author.blank? %>
120
+ <h1><%= title %></h1>
121
+
122
+ <div class="byline">
123
+ <% unless author.blank? %>
76
124
  <h2>by <%= author %></h2>
77
125
  <% end %>
78
126
 
127
+ <% if last_update %>
128
+ <h3>Last updated: <%= last_update.strftime('%e %B %Y') %></h3>
129
+ <% end %>
130
+ </div>
131
+
132
+ <div class="clearfix"></div>
133
+
79
134
  <ul>
80
- <% @clippings.each do |clipping| %>
135
+ <% clippings.each do |clipping| %>
81
136
  <% unless clipping.text.blank? %>
82
137
  <li class="<%= clipping.type %>">
83
138
  <p>
@@ -85,8 +140,7 @@
85
140
  </p>
86
141
  <footer>
87
142
  <%= clipping.type %>
88
- <% if include_pages && clipping.page %> on page <%= clipping.page %><% end %>
89
- <% if clipping.location %> @ loc <%= clipping.location %><% end %>
143
+ <% if clipping.location %> @ <%= location_html(clipping.location) %><% end %>
90
144
  </footer>
91
145
  </li>
92
146
  <% end %>
@@ -94,7 +148,7 @@
94
148
  </ul>
95
149
 
96
150
  <footer>
97
- Generated by <a href="https://github.com/grassdog/klipbook">Klipbook <%= Klipbook::VERSION %></a> on <span><%= DateTime.now.strftime('%e %b %Y at %l:%M %P') %></span>
151
+ <%= clippings.count %> clippings &bull; Generated by <a href="https://github.com/grassdog/klipbook">Klipbook <%= Klipbook::VERSION %></a> on <span><%= DateTime.now.strftime('%e %b %Y at %l:%M %P') %></span>
98
152
  </footer>
99
153
  </body>
100
154
  </html>
@@ -0,0 +1,42 @@
1
+ require 'erb'
2
+
3
+ module Klipbook::Output
4
+ class HtmlSummaryWriter
5
+ def initialize(message_stream=$stdout)
6
+ @message_stream = message_stream
7
+ end
8
+
9
+ def write(book, output_dir, force)
10
+ require 'rainbow'
11
+
12
+ book.extend Klipbook::Output::BookHelpers
13
+
14
+ filename = filename_for_book(book)
15
+ filepath = File.join(output_dir, filename)
16
+
17
+ if !force && File.exists?(filepath)
18
+ @message_stream.puts("Skipping ".foreground(:yellow) + filename)
19
+ return
20
+ end
21
+
22
+ @message_stream.puts("Writing ".foreground(:green) + filename)
23
+ File.open(filepath, 'w') do |f|
24
+ f.write generate_html(book)
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def filename_for_book(book)
31
+ "#{book.title_and_author}.html"
32
+ end
33
+
34
+ def generate_html(book)
35
+ ERB.new(template, 0, '%<>').result(book.get_binding)
36
+ end
37
+
38
+ def template
39
+ @template ||= File.read(File.join(File.dirname(__FILE__), 'html_book_summary.erb'))
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,18 @@
1
+ module Klipbook
2
+ class Printer
3
+ def initialize(books)
4
+ @books = books
5
+ end
6
+
7
+ def print(output=$stdout)
8
+ if @books.empty?
9
+ output.puts 'No books available'
10
+ else
11
+ output.puts 'Book list:'
12
+ @books.each_with_index do |book, index|
13
+ output.puts "[#{index + 1}] #{book.title_and_author}"
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,67 @@
1
+ require 'mechanize'
2
+
3
+ module Klipbook::Sources
4
+ module AmazonSite
5
+ class BookScraper
6
+
7
+ def scrape_book(page)
8
+ page.search(".//div[@class='bookMain yourHighlightsHeader']").map { |element| build_book(page, element) }
9
+ end
10
+
11
+ private
12
+
13
+ def build_book(page, element)
14
+ Klipbook::Book.new do |b|
15
+ b.asin = element.attribute("id").value.gsub(/_[0-9]+$/, "")
16
+ b.author = element.xpath("span[@class='author']").text.gsub("\n", "").gsub(" by ", "").strip
17
+ b.title = element.xpath("span/a").text
18
+ b.last_update = extract_last_update(element.xpath("div[@class='lastHighlighted']").text)
19
+ b.clippings = scrape_clippings(page)
20
+ end
21
+ end
22
+
23
+ def extract_last_update(text)
24
+ text = text.gsub('Last annotated on ', '')
25
+ DateTime.parse(text)
26
+ end
27
+
28
+ def scrape_clippings(page)
29
+ page.search(".//div[@class='highlightRow yourHighlight']").map { |element| build_clipping(element) }.flatten
30
+ end
31
+
32
+ def build_clipping(element)
33
+ location = extract_location(element)
34
+ annotation_id = element.xpath("form/input[@id='annotation_id']").attribute("value").value
35
+ note_text = element.xpath("p/span[@class='noteContent']").text
36
+
37
+ highlight = Klipbook::Clipping.new do |c|
38
+ c.annotation_id = annotation_id
39
+ c.text = element.xpath("span[@class='highlight']").text
40
+ c.type = :highlight
41
+ c.location = location
42
+ end
43
+
44
+ if note_text.blank?
45
+ highlight
46
+ else
47
+ note = Klipbook::Clipping.new do |c|
48
+ c.annotation_id = annotation_id
49
+ c.text = note_text
50
+ c.type = :note
51
+ c.location = location
52
+ end
53
+
54
+ [highlight, note]
55
+ end
56
+ end
57
+
58
+ def extract_location(element)
59
+ if element.xpath("a[@class='k4pcReadMore readMore linkOut']").attribute("href").value =~ /location=([0-9]+)$/
60
+ $1.to_i
61
+ else
62
+ 0
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,78 @@
1
+ require 'mechanize'
2
+
3
+ module Klipbook::Sources
4
+ module AmazonSite
5
+ class Scraper
6
+ def initialize(username, password, max_books,
7
+ book_scraper=Klipbook::Sources::AmazonSite::BookScraper.new,
8
+ message_stream=$stdout)
9
+ @username = username
10
+ @password = password
11
+ @max_books = max_books
12
+ @message_stream = message_stream
13
+ @agent = Mechanize.new
14
+ @book_scraper = book_scraper
15
+ end
16
+
17
+ def books
18
+ @books ||= fetch_books
19
+ end
20
+
21
+ private
22
+
23
+ def fetch_books
24
+ login_form = login
25
+
26
+ signin_submission = @agent.submit(login_form)
27
+
28
+ raise 'Invalid Username or password' unless signin_submission.title == "Amazon Kindle: Home"
29
+
30
+ page = @agent.click(signin_submission.link_with(:text => /Your Highlights/))
31
+
32
+ scrape_books(page)
33
+ end
34
+
35
+ def login
36
+ @message_stream.puts 'Logging into site'
37
+
38
+ begin
39
+ page = @agent.get("https://www.amazon.com/ap/signin?openid.return_to=https%3A%2F%2Fkindle.amazon.com%3A443%2Fauthenticate%2Flogin_callback%3Fwctx%3D%252F&pageId=amzn_kindle&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.pape.max_auth_age=0&openid.assoc_handle=amzn_kindle&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select")
40
+ rescue
41
+ raise 'Could not connect to Amazon Kindle Site'
42
+ end
43
+
44
+ login_form = page.form('signIn')
45
+
46
+ login_form.email = @username
47
+ login_form.password = @password
48
+
49
+ login_form
50
+ end
51
+
52
+ def scrape_books(page)
53
+ books = []
54
+ @message_stream.print 'Fetching books '
55
+
56
+ @max_books.times do |count|
57
+ @message_stream.print '.'
58
+ books << @book_scraper.scrape_book(page)
59
+ page = get_next_page(page)
60
+ break unless page
61
+ end
62
+
63
+ puts ' Done!'
64
+
65
+ books.flatten
66
+ end
67
+
68
+ def get_next_page(page)
69
+ ret = page.search(".//a[@id='nextBookLink']").first
70
+ if ret and ret.attribute("href")
71
+ @agent.get("https://kindle.amazon.com" + ret.attribute("href").value)
72
+ else
73
+ nil
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,11 @@
1
+ module Klipbook::Sources
2
+ module KindleDevice
3
+ class Entry
4
+ attr_accessor :title, :author, :type, :location, :page, :added_on, :text
5
+
6
+ def initialize
7
+ yield self if block_given?
8
+ end
9
+ end
10
+ end
11
+ end