bookmeter_scraper 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.ja.md +42 -5
- data/README.md +40 -4
- data/lib/bookmeter_scraper.rb +46 -0
- data/lib/bookmeter_scraper/agent.rb +59 -0
- data/lib/bookmeter_scraper/bookmeter.rb +52 -393
- data/lib/bookmeter_scraper/configuration.rb +16 -5
- data/lib/bookmeter_scraper/scraper.rb +388 -0
- data/lib/bookmeter_scraper/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eed0f25219959cbcb0f1e74a0db32d7f6ef46de8
|
4
|
+
data.tar.gz: bf5981a2fcb2c933c41720cb99846ac8d1df7dad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 894e75e566f6e547089048bf6872917c79dcb2a9456d36afd59dc624bdd62a67b9bdce23cd811e1035408b14bc7eba48928e03337d50fed102206daee899cf5f
|
7
|
+
data.tar.gz: 3077ac2b3b900537f494ed3fe001cb2be7af6a726293945ad738215ea205ac536d53ac3ad70ba1587dfbcedf4d820387845036c31fc80af92445c4ea2ffd9388
|
data/README.ja.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
# Bookmeter Scraper [](https://travis-ci.org/kymmt90/bookmeter_scraper)
|
1
|
+
# Bookmeter Scraper [](https://travis-ci.org/kymmt90/bookmeter_scraper) [](https://badge.fury.io/rb/bookmeter_scraper)
|
2
|
+
|
2
3
|
|
3
4
|
[読書メーター](http://bookmeter.com)の情報をスクレイピングして Ruby で扱えるようにするための gem です。
|
4
5
|
|
@@ -30,10 +31,11 @@ require 'bookmeter_scraper'
|
|
30
31
|
|
31
32
|
書籍情報、お気に入り / お気に入られユーザ情報を取得するには、`Bookmeter.log_in` または `Bookmeter#log_in` でログインしておく必要があります。
|
32
33
|
|
33
|
-
ログイン情報の入力には以下の
|
34
|
+
ログイン情報の入力には以下の 3 通りの方法があります。
|
34
35
|
|
35
36
|
1. 引数として渡す
|
36
37
|
2. `config.yml` へ記述しておく
|
38
|
+
3. ブロック内で設定する
|
37
39
|
|
38
40
|
#### 1. 引数として渡す
|
39
41
|
|
@@ -67,6 +69,28 @@ bookmeter = BookmeterScraper::Bookmeter.log_in
|
|
67
69
|
bookmeter.logged_in? # true
|
68
70
|
```
|
69
71
|
|
72
|
+
#### 3. ブロック内で設定する
|
73
|
+
|
74
|
+
以下のように `Bookmeter.log_in` へブロックを渡すことで、ログインできます。
|
75
|
+
|
76
|
+
```ruby
|
77
|
+
bookmeter = BookmeterScraper::Bookmeter.log_in do |configuration|
|
78
|
+
configuration.mail = 'example@example.com'
|
79
|
+
configuration.password = 'password'
|
80
|
+
end
|
81
|
+
bookmeter.logged_in? # true
|
82
|
+
```
|
83
|
+
|
84
|
+
`Bookmeter#log_in` でもログイン可能です。
|
85
|
+
|
86
|
+
```ruby
|
87
|
+
bookmeter = BookmeterScraper::Bookmeter.new
|
88
|
+
bookmeter.log_in do |configuration|
|
89
|
+
configuration.mail = 'example@example.com'
|
90
|
+
configuration.password = 'password'
|
91
|
+
end
|
92
|
+
```
|
93
|
+
|
70
94
|
### 書籍情報の取得
|
71
95
|
|
72
96
|
以下の書籍情報
|
@@ -76,7 +100,7 @@ bookmeter.logged_in? # true
|
|
76
100
|
- 積読本
|
77
101
|
- 読みたい本
|
78
102
|
|
79
|
-
|
103
|
+
を取得できます。取得には `Bookmeter.log_in` などによる事前のログインが必要です。
|
80
104
|
|
81
105
|
#### 読んだ本
|
82
106
|
|
@@ -92,13 +116,17 @@ bookmeter.read_books('01010101') # 他のユーザの ID を指定して、
|
|
92
116
|
- 書名 `name`
|
93
117
|
- 著者 `author`
|
94
118
|
- 読了日(初読了日と再読日の両方)の配列 `read_dates`
|
119
|
+
- 読書メーター内の書籍ページの URI `uri`
|
120
|
+
- 書籍の表紙画像 URI `image_uri`
|
95
121
|
|
96
|
-
を属性として持つ `
|
122
|
+
を属性として持つ `Book` の配列として取得できます。
|
97
123
|
|
98
124
|
```ruby
|
99
125
|
books[0].name
|
100
126
|
books[0].author
|
101
127
|
books[0].read_dates
|
128
|
+
books[0].uri
|
129
|
+
books[0].image_uri
|
102
130
|
```
|
103
131
|
|
104
132
|
さらに、`Bookmeter#read_books_in` で特定年月の「読んだ本」情報が取得できます。
|
@@ -129,6 +157,8 @@ books = bookmeter.reading_books # ログインユーザの「読んでる本
|
|
129
157
|
books[0].name
|
130
158
|
books[0].author
|
131
159
|
books[0].read_dates # 読了日の Array は空
|
160
|
+
books[0].uri
|
161
|
+
books[0].image_uri
|
132
162
|
|
133
163
|
bookmeter.tsundoku # ログインユーザの「積読本」を取得
|
134
164
|
bookmeter.wish_list # ログインユーザの「読みたい本」を取得
|
@@ -143,13 +173,20 @@ following_users = bookmeter.followings # 「お気に入り」ユーザの情
|
|
143
173
|
followers = bookmeter.followers # 「お気に入られ」ユーザの情報を取得
|
144
174
|
```
|
145
175
|
|
146
|
-
|
176
|
+
ユーザ情報は
|
177
|
+
|
178
|
+
- ユーザ名 `name`
|
179
|
+
- ユーザ ID `id`
|
180
|
+
- 読書メーター内のユーザページの URI `uri`
|
181
|
+
|
182
|
+
を持つ `User` の配列として取得できます。
|
147
183
|
|
148
184
|
```ruby
|
149
185
|
following_users[0].name
|
150
186
|
following_users[0].id
|
151
187
|
followers[0].name
|
152
188
|
followers[0].id
|
189
|
+
followers[0].uri
|
153
190
|
```
|
154
191
|
|
155
192
|
#### 注意
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Bookmeter Scraper [](https://travis-ci.org/kymmt90/bookmeter_scraper)
|
1
|
+
# Bookmeter Scraper [](https://travis-ci.org/kymmt90/bookmeter_scraper) [](https://badge.fury.io/rb/bookmeter_scraper)
|
2
2
|
|
3
3
|
A library for scraping [Bookmeter](http://bookmeter.com).
|
4
4
|
|
@@ -34,10 +34,11 @@ require 'bookmeter_scraper'
|
|
34
34
|
|
35
35
|
You need to log in Bookmeter to get books and followings / followers information by `Bookmeter.log_in` or `Bookmeter#log_in`.
|
36
36
|
|
37
|
-
There are
|
37
|
+
There are 3 ways to input authentication information:
|
38
38
|
|
39
39
|
1. Passing as arguments
|
40
40
|
2. Writing out to `config.yml`
|
41
|
+
3. Configuring in a block
|
41
42
|
|
42
43
|
#### 1. Passing as arguments
|
43
44
|
|
@@ -71,6 +72,27 @@ bookmeter = BookmeterScraper::Bookmeter.log_in
|
|
71
72
|
bookmeter.logged_in? # true
|
72
73
|
```
|
73
74
|
|
75
|
+
#### 3. Configuring in a block
|
76
|
+
|
77
|
+
You can configure mail address and password in a block.
|
78
|
+
|
79
|
+
```ruby
|
80
|
+
bookmeter = BookmeterScraper::Bookmeter.log_in do |configuration|
|
81
|
+
configuration.mail = 'example@example.com'
|
82
|
+
configuration.password = 'password'
|
83
|
+
end
|
84
|
+
bookmeter.logged_in? # true
|
85
|
+
```
|
86
|
+
|
87
|
+
`Bookmeter#log_in` is also available:
|
88
|
+
|
89
|
+
```ruby
|
90
|
+
bookmeter = BookmeterScraper::Bookmeter.new
|
91
|
+
bookmeter.log_in do |configuration|
|
92
|
+
configuration.mail = 'example@example.com'
|
93
|
+
configuration.password = 'password'
|
94
|
+
end
|
95
|
+
```
|
74
96
|
|
75
97
|
### Get books information
|
76
98
|
|
@@ -92,12 +114,20 @@ books = bookmeter.read_books # get read books of the logged in user
|
|
92
114
|
bookmeter.read_books('01010101') # get read books of a user specified by ID
|
93
115
|
```
|
94
116
|
|
95
|
-
Books infomation is an array of `
|
117
|
+
Books infomation is an array of `Book` which has these attributes:
|
118
|
+
|
119
|
+
- `name`
|
120
|
+
- `read_dates`
|
121
|
+
- `uri`
|
122
|
+
- `image_uri`
|
123
|
+
|
96
124
|
`read_dates` is an array of finished reading dates (first finished date and reread dates):
|
97
125
|
|
98
126
|
```ruby
|
99
127
|
books[0].name
|
100
128
|
books[0].read_dates
|
129
|
+
books[0].uri
|
130
|
+
books[0].image_uri
|
101
131
|
```
|
102
132
|
|
103
133
|
To specify year-month for read books, you can use `Bookmeter#read_books_in`:
|
@@ -135,13 +165,19 @@ followers = bookmeter.followers
|
|
135
165
|
|
136
166
|
You need to log in Bookmeter in advance to get these information.
|
137
167
|
|
138
|
-
Users information is an array of `Struct` which has
|
168
|
+
Users information is an array of `Struct` which has following attributes:
|
169
|
+
|
170
|
+
- `name`
|
171
|
+
- `id`
|
172
|
+
- `uri`
|
139
173
|
|
140
174
|
```ruby
|
141
175
|
following_users[0].name
|
142
176
|
following_users[0].id
|
177
|
+
following_users[0].uri
|
143
178
|
followers[0].name
|
144
179
|
followers[0].id
|
180
|
+
followers[0].uri
|
145
181
|
```
|
146
182
|
|
147
183
|
#### Notice
|
data/lib/bookmeter_scraper.rb
CHANGED
@@ -1,3 +1,49 @@
|
|
1
1
|
require 'bookmeter_scraper/bookmeter'
|
2
2
|
require 'bookmeter_scraper/configuration'
|
3
3
|
require 'bookmeter_scraper/version'
|
4
|
+
|
5
|
+
module BookmeterScraper
|
6
|
+
ROOT_URI = 'http://bookmeter.com'.freeze
|
7
|
+
LOGIN_URI = "#{ROOT_URI}/login".freeze
|
8
|
+
|
9
|
+
USER_ID_REGEX = /^\d+$/
|
10
|
+
|
11
|
+
class << self
|
12
|
+
def mypage_uri(user_id)
|
13
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
14
|
+
"#{ROOT_URI}/u/#{user_id}"
|
15
|
+
end
|
16
|
+
|
17
|
+
def read_books_uri(user_id)
|
18
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
19
|
+
"#{ROOT_URI}/u/#{user_id}/booklist"
|
20
|
+
end
|
21
|
+
|
22
|
+
def reading_books_uri(user_id)
|
23
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
24
|
+
"#{ROOT_URI}/u/#{user_id}/booklistnow"
|
25
|
+
end
|
26
|
+
|
27
|
+
def tsundoku_uri(user_id)
|
28
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
29
|
+
"#{ROOT_URI}/u/#{user_id}/booklisttun"
|
30
|
+
end
|
31
|
+
|
32
|
+
def wish_list_uri(user_id)
|
33
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
34
|
+
"#{ROOT_URI}/u/#{user_id}/booklistpre"
|
35
|
+
end
|
36
|
+
|
37
|
+
def followings_uri(user_id)
|
38
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
39
|
+
"#{ROOT_URI}/u/#{user_id}/favorite_user"
|
40
|
+
end
|
41
|
+
|
42
|
+
def followers_uri(user_id)
|
43
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
44
|
+
"#{ROOT_URI}/u/#{user_id}/favorited_user"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
class BookmeterError < StandardError; end
|
49
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
|
3
|
+
module BookmeterScraper
|
4
|
+
class Agent
|
5
|
+
extend Forwardable
|
6
|
+
def_delegator :@agent, :get
|
7
|
+
def_delegator :@agent, :click
|
8
|
+
|
9
|
+
attr_reader :log_in_user_id
|
10
|
+
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@agent = Mechanize.new do |a|
|
14
|
+
a.user_agent_alias = Mechanize::AGENT_ALIASES.keys.reject do |ua_alias|
|
15
|
+
%w(Android iPad iPhone Mechanize).include?(ua_alias)
|
16
|
+
end.sample
|
17
|
+
end
|
18
|
+
@log_in_user_id = nil
|
19
|
+
end
|
20
|
+
|
21
|
+
def log_in(config)
|
22
|
+
raise ArgumentError if config.nil?
|
23
|
+
|
24
|
+
page_after_submitting_form = nil
|
25
|
+
@agent.get(BookmeterScraper::LOGIN_URI) do |page|
|
26
|
+
page_after_submitting_form = page.form_with(action: '/login') do |form|
|
27
|
+
form.field_with(name: 'mail').value = config.mail
|
28
|
+
form.field_with(name: 'password').value = config.password
|
29
|
+
end.submit
|
30
|
+
end
|
31
|
+
|
32
|
+
if page_after_logging_in? page_after_submitting_form
|
33
|
+
mypage = page_after_submitting_form.link_with(text: 'マイページ').click
|
34
|
+
@log_in_user_id = extract_user_id(mypage)
|
35
|
+
else
|
36
|
+
nil
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def logged_in?
|
41
|
+
!@log_in_user_id.nil?
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def page_after_logging_in?(page)
|
48
|
+
raise ArgumentError if page.nil?
|
49
|
+
|
50
|
+
page.uri.to_s == BookmeterScraper::ROOT_URI + '/'
|
51
|
+
end
|
52
|
+
|
53
|
+
def extract_user_id(page)
|
54
|
+
raise ArgumentError if page.nil?
|
55
|
+
|
56
|
+
page.uri.to_s.match(/\/u\/(\d+)$/)[1]
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -1,130 +1,51 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
3
|
-
require 'yasuri'
|
1
|
+
require 'bookmeter_scraper/agent'
|
2
|
+
require 'bookmeter_scraper/scraper'
|
4
3
|
|
5
4
|
module BookmeterScraper
|
6
5
|
class Bookmeter
|
7
6
|
DEFAULT_CONFIG_PATH = './config.yml'.freeze
|
8
7
|
|
9
|
-
ROOT_URI = 'http://bookmeter.com'.freeze
|
10
|
-
LOGIN_URI = "#{ROOT_URI}/login".freeze
|
11
|
-
|
12
|
-
PROFILE_ATTRIBUTES = %i(name gender age blood_type job address url description first_day elapsed_days read_books_count read_pages_count reviews_count bookshelfs_count)
|
13
|
-
Profile = Struct.new(*PROFILE_ATTRIBUTES)
|
14
|
-
|
15
|
-
BOOK_ATTRIBUTES = %i(name author read_dates)
|
16
|
-
Book = Struct.new(*BOOK_ATTRIBUTES)
|
17
|
-
class Books
|
18
|
-
extend Forwardable
|
19
|
-
|
20
|
-
def_delegator :@books, :[]
|
21
|
-
def_delegator :@books, :[]=
|
22
|
-
def_delegator :@books, :<<
|
23
|
-
def_delegator :@books, :each
|
24
|
-
def_delegator :@books, :flatten!
|
25
|
-
|
26
|
-
def initialize; @books = []; end
|
27
|
-
|
28
|
-
def concat(books)
|
29
|
-
books.each do |book|
|
30
|
-
next if @books.any? { |b| b.name == book.name && b.author == book.author }
|
31
|
-
@books << book
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
def to_a; @books; end
|
36
|
-
end
|
37
|
-
|
38
|
-
USER_ATTRIBUTES = %i(name id)
|
39
|
-
User = Struct.new(*USER_ATTRIBUTES)
|
40
|
-
|
41
|
-
JP_ATTRIBUTE_NAMES = {
|
42
|
-
gender: '性別',
|
43
|
-
age: '年齢',
|
44
|
-
blood_type: '血液型',
|
45
|
-
job: '職業',
|
46
|
-
address: '現住所',
|
47
|
-
url: 'URL / ブログ',
|
48
|
-
description: '自己紹介',
|
49
|
-
first_day: '記録初日',
|
50
|
-
elapsed_days: '経過日数',
|
51
|
-
read_books_count: '読んだ本',
|
52
|
-
read_pages_count: '読んだページ',
|
53
|
-
reviews_count: '感想/レビュー',
|
54
|
-
bookshelfs_count: '本棚',
|
55
|
-
}
|
56
|
-
|
57
|
-
NUM_BOOKS_PER_PAGE = 40
|
58
|
-
NUM_USERS_PER_PAGE = 20
|
59
|
-
|
60
8
|
attr_reader :log_in_user_id
|
61
9
|
|
62
|
-
def self.mypage_uri(user_id)
|
63
|
-
raise ArgumentError unless user_id =~ /^\d+$/
|
64
|
-
"#{ROOT_URI}/u/#{user_id}"
|
65
|
-
end
|
66
|
-
|
67
|
-
def self.read_books_uri(user_id)
|
68
|
-
raise ArgumentError unless user_id =~ /^\d+$/
|
69
|
-
"#{ROOT_URI}/u/#{user_id}/booklist"
|
70
|
-
end
|
71
|
-
|
72
|
-
def self.reading_books_uri(user_id)
|
73
|
-
raise ArgumentError unless user_id =~ /^\d+$/
|
74
|
-
"#{ROOT_URI}/u/#{user_id}/booklistnow"
|
75
|
-
end
|
76
|
-
|
77
|
-
def self.tsundoku_uri(user_id)
|
78
|
-
raise ArgumentError unless user_id =~ /^\d+$/
|
79
|
-
"#{ROOT_URI}/u/#{user_id}/booklisttun"
|
80
|
-
end
|
81
|
-
|
82
|
-
def self.wish_list_uri(user_id)
|
83
|
-
raise ArgumentError unless user_id =~ /^\d+$/
|
84
|
-
"#{ROOT_URI}/u/#{user_id}/booklistpre"
|
85
|
-
end
|
86
|
-
|
87
|
-
def self.followings_uri(user_id)
|
88
|
-
raise ArgumentError unless user_id =~ /^\d+$/
|
89
|
-
"#{ROOT_URI}/u/#{user_id}/favorite_user"
|
90
|
-
end
|
91
|
-
|
92
|
-
def self.followers_uri(user_id)
|
93
|
-
raise ArgumentError unless user_id =~ /^\d+$/
|
94
|
-
"#{ROOT_URI}/u/#{user_id}/favorited_user"
|
95
|
-
end
|
96
10
|
|
97
|
-
|
98
|
-
|
99
|
-
|
11
|
+
class << self
|
12
|
+
def log_in(mail = nil, password = nil)
|
13
|
+
Bookmeter.new.tap do |bookmeter|
|
14
|
+
if block_given?
|
15
|
+
config = Configuration.new
|
16
|
+
yield config
|
17
|
+
bookmeter.log_in(config.mail, config.password)
|
18
|
+
else
|
19
|
+
bookmeter.log_in(mail, password)
|
20
|
+
end
|
21
|
+
end
|
100
22
|
end
|
101
23
|
end
|
102
24
|
|
103
25
|
|
104
26
|
def initialize(agent = nil)
|
105
|
-
@agent
|
106
|
-
@
|
27
|
+
@agent = agent.nil? ? Agent.new : agent
|
28
|
+
@scraper = Scraper.new(@agent)
|
29
|
+
@logged_in = false
|
107
30
|
@log_in_user_id = nil
|
108
|
-
@book_pages = {}
|
109
31
|
end
|
110
32
|
|
111
33
|
def log_in(mail = nil, password = nil)
|
112
34
|
raise BookmeterError if @agent.nil?
|
113
35
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
return unless logged_in?
|
36
|
+
configuration = if block_given?
|
37
|
+
Configuration.new.tap { |config| yield config }
|
38
|
+
elsif mail.nil? && password.nil?
|
39
|
+
Configuration.new(DEFAULT_CONFIG_PATH)
|
40
|
+
else
|
41
|
+
Configuration.new.tap do |config|
|
42
|
+
config.mail = mail
|
43
|
+
config.password = password
|
44
|
+
end
|
45
|
+
end
|
125
46
|
|
126
|
-
|
127
|
-
@
|
47
|
+
@log_in_user_id = @agent.log_in(configuration)
|
48
|
+
@logged_in = !@log_in_user_id.nil?
|
128
49
|
end
|
129
50
|
|
130
51
|
def logged_in?
|
@@ -132,321 +53,59 @@ module BookmeterScraper
|
|
132
53
|
end
|
133
54
|
|
134
55
|
def profile(user_id)
|
135
|
-
raise ArgumentError unless user_id =~
|
136
|
-
|
137
|
-
mypage = @agent.get(Bookmeter.mypage_uri(user_id))
|
138
|
-
|
139
|
-
profile_dl_tags = mypage.search('#side_left > div.inner > div.profile > dl')
|
140
|
-
jp_attribute_names = profile_dl_tags.map { |i| i.children[0].children.text }
|
141
|
-
attribute_values = profile_dl_tags.map { |i| i.children[1].children.text }
|
142
|
-
jp_attributes = Hash[jp_attribute_names.zip(attribute_values)]
|
143
|
-
attributes = PROFILE_ATTRIBUTES.map do |attribute|
|
144
|
-
jp_attributes[JP_ATTRIBUTE_NAMES[attribute]]
|
145
|
-
end
|
146
|
-
attributes[0] = mypage.at_css('#side_left > div.inner > h3').text
|
147
|
-
|
148
|
-
Profile.new(*attributes)
|
56
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
57
|
+
@scraper.fetch_profile(user_id)
|
149
58
|
end
|
150
59
|
|
151
60
|
def read_books(user_id = @log_in_user_id)
|
152
|
-
|
153
|
-
|
154
|
-
books.to_a
|
61
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
62
|
+
fetch_books(user_id, :read_books_uri)
|
155
63
|
end
|
156
64
|
|
157
65
|
def read_books_in(year, month, user_id = @log_in_user_id)
|
66
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
67
|
+
|
158
68
|
date = Time.local(year, month)
|
159
|
-
books =
|
69
|
+
books = @scraper.fetch_read_books(user_id, date)
|
160
70
|
books.each { |b| yield b } if block_given?
|
161
71
|
books.to_a
|
162
72
|
end
|
163
73
|
|
164
74
|
def reading_books(user_id = @log_in_user_id)
|
165
|
-
|
166
|
-
|
167
|
-
books.to_a
|
75
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
76
|
+
fetch_books(user_id, :reading_books_uri)
|
168
77
|
end
|
169
78
|
|
170
79
|
def tsundoku(user_id = @log_in_user_id)
|
171
|
-
|
172
|
-
|
173
|
-
books.to_a
|
80
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
81
|
+
fetch_books(user_id, :tsundoku_uri)
|
174
82
|
end
|
175
83
|
|
176
84
|
def wish_list(user_id = @log_in_user_id)
|
177
|
-
|
178
|
-
|
179
|
-
books.to_a
|
85
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
86
|
+
fetch_books(user_id, :wish_list_uri)
|
180
87
|
end
|
181
88
|
|
182
89
|
def followings(user_id = @log_in_user_id)
|
183
|
-
|
90
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
91
|
+
@scraper.fetch_followings(user_id)
|
184
92
|
end
|
185
93
|
|
186
94
|
def followers(user_id = @log_in_user_id)
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
private
|
191
|
-
|
192
|
-
def self.new_agent
|
193
|
-
agent = Mechanize.new do |a|
|
194
|
-
a.user_agent_alias = Mechanize::AGENT_ALIASES.keys.reject do |ua_alias|
|
195
|
-
%w(Android iPad iPhone Mechanize).include?(ua_alias)
|
196
|
-
end.sample
|
197
|
-
end
|
198
|
-
end
|
199
|
-
|
200
|
-
def extract_user_id(page)
|
201
|
-
page.uri.to_s.match(/\/u\/(\d+)$/)[1]
|
202
|
-
end
|
203
|
-
|
204
|
-
def get_books(user_id, uri_method)
|
205
|
-
books = Books.new
|
206
|
-
scraped_pages = scrape_book_pages(user_id, uri_method)
|
207
|
-
scraped_pages.each do |page|
|
208
|
-
books << get_book_structs(page)
|
209
|
-
books.flatten!
|
210
|
-
end
|
211
|
-
books
|
95
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
96
|
+
@scraper.fetch_followers(user_id)
|
212
97
|
end
|
213
98
|
|
214
|
-
def get_read_books(user_id, target_ym)
|
215
|
-
result = Books.new
|
216
|
-
scrape_book_pages(user_id, :read_books_uri).each do |page|
|
217
|
-
first_book_date = get_read_date(page['book_1_link'])
|
218
|
-
last_book_date = get_last_book_date(page)
|
219
|
-
|
220
|
-
first_book_ym = Time.local(first_book_date['year'].to_i, first_book_date['month'].to_i)
|
221
|
-
last_book_ym = Time.local(last_book_date['year'].to_i, last_book_date['month'].to_i)
|
222
|
-
|
223
|
-
if target_ym < last_book_ym
|
224
|
-
next
|
225
|
-
elsif target_ym == first_book_ym && target_ym > last_book_ym
|
226
|
-
result.concat(get_target_books(target_ym, page))
|
227
|
-
break
|
228
|
-
elsif target_ym < first_book_ym && target_ym > last_book_ym
|
229
|
-
result.concat(get_target_books(target_ym, page))
|
230
|
-
break
|
231
|
-
elsif target_ym <= first_book_ym && target_ym >= last_book_ym
|
232
|
-
result.concat(get_target_books(target_ym, page))
|
233
|
-
elsif target_ym > first_book_ym
|
234
|
-
break
|
235
|
-
end
|
236
|
-
end
|
237
|
-
result
|
238
|
-
end
|
239
|
-
|
240
|
-
def get_last_book_date(page)
|
241
|
-
NUM_BOOKS_PER_PAGE.downto(1) do |i|
|
242
|
-
link = page["book_#{i}_link"]
|
243
|
-
next if link.empty?
|
244
|
-
return get_read_date(link)
|
245
|
-
end
|
246
|
-
end
|
247
|
-
|
248
|
-
def get_target_books(target_ym, page)
|
249
|
-
target_books = Books.new
|
250
|
-
|
251
|
-
1.upto(NUM_BOOKS_PER_PAGE) do |i|
|
252
|
-
next if page["book_#{i}_link"].empty?
|
253
|
-
|
254
|
-
read_yms = []
|
255
|
-
read_date = get_read_date(page["book_#{i}_link"])
|
256
|
-
read_dates = [Time.local(read_date['year'], read_date['month'], read_date['day'])]
|
257
|
-
read_yms << Time.local(read_date['year'], read_date['month'])
|
258
|
-
|
259
|
-
reread_dates = []
|
260
|
-
reread_dates << get_reread_date(page["book_#{i}_link"])
|
261
|
-
reread_dates.flatten!
|
262
|
-
|
263
|
-
unless reread_dates.empty?
|
264
|
-
reread_dates.each do |date|
|
265
|
-
read_yms << Time.local(date['reread_year'], date['reread_month'])
|
266
|
-
end
|
267
|
-
end
|
268
99
|
|
269
|
-
|
270
|
-
|
271
|
-
unless reread_dates.empty?
|
272
|
-
reread_dates.each do |date|
|
273
|
-
read_dates << Time.local(date['reread_year'], date['reread_month'], date['reread_day'])
|
274
|
-
end
|
275
|
-
end
|
276
|
-
book_name = get_book_name(page["book_#{i}_link"])
|
277
|
-
book_author = get_book_author(page["book_#{i}_link"])
|
278
|
-
book = Book.new(book_name, book_author, read_dates)
|
279
|
-
target_books << book
|
280
|
-
end
|
281
|
-
|
282
|
-
target_books
|
283
|
-
end
|
284
|
-
|
285
|
-
def scrape_book_pages(user_id, uri_method)
|
286
|
-
raise ArgumentError unless user_id =~ /^\d+$/
|
287
|
-
raise ArgumentError unless Bookmeter.methods.include?(uri_method)
|
288
|
-
return [] unless logged_in?
|
289
|
-
|
290
|
-
books_page = @agent.get(Bookmeter.method(uri_method).call(user_id))
|
291
|
-
|
292
|
-
# if books are not found at all
|
293
|
-
return [] if books_page.search('#main_left > div > center > a').empty?
|
294
|
-
|
295
|
-
if books_page.search('span.now_page').empty?
|
296
|
-
books_root = Yasuri.struct_books '//*[@id="main_left"]/div' do
|
297
|
-
1.upto(NUM_BOOKS_PER_PAGE) do |i|
|
298
|
-
send("text_book_#{i}_name", "//*[@id=\"main_left\"]/div/div[#{i + 1}]/div[2]/a")
|
299
|
-
send("text_book_#{i}_link", "//*[@id=\"main_left\"]/div/div[#{i + 1}]/div[2]/a/@href")
|
300
|
-
end
|
301
|
-
end
|
302
|
-
return [books_root.inject(@agent, books_page)]
|
303
|
-
end
|
304
|
-
|
305
|
-
books_root = Yasuri.pages_root '//span[@class="now_page"]/following-sibling::span[1]/a' do
|
306
|
-
text_page_index '//span[@class="now_page"]/a'
|
307
|
-
1.upto(NUM_BOOKS_PER_PAGE) do |i|
|
308
|
-
send("text_book_#{i}_name", "//*[@id=\"main_left\"]/div/div[#{i + 1}]/div[2]/a")
|
309
|
-
send("text_book_#{i}_link", "//*[@id=\"main_left\"]/div/div[#{i + 1}]/div[2]/a/@href")
|
310
|
-
end
|
311
|
-
end
|
312
|
-
books_root.inject(@agent, books_page)
|
313
|
-
end
|
314
|
-
|
315
|
-
def get_book_page(book_uri)
|
316
|
-
@book_pages[book_uri] = @agent.get(ROOT_URI + book_uri) unless @book_pages[book_uri]
|
317
|
-
@book_pages[book_uri]
|
318
|
-
end
|
319
|
-
|
320
|
-
def get_book_name(book_uri)
|
321
|
-
get_book_page(book_uri).search('#title').text
|
322
|
-
end
|
323
|
-
|
324
|
-
def get_book_author(book_uri)
|
325
|
-
get_book_page(book_uri).search('#author_name').text
|
326
|
-
end
|
327
|
-
|
328
|
-
def get_read_date(book_uri)
|
329
|
-
book_date = Yasuri.struct_date '//*[@id="book_edit_area"]/form[1]/div[2]' do
|
330
|
-
text_year '//*[@id="read_date_y"]/option[1]', truncate: /\d+/, proc: :to_i
|
331
|
-
text_month '//*[@id="read_date_m"]/option[1]', truncate: /\d+/, proc: :to_i
|
332
|
-
text_day '//*[@id="read_date_d"]/option[1]', truncate: /\d+/, proc: :to_i
|
333
|
-
end
|
334
|
-
book_date.inject(@agent, get_book_page(book_uri))
|
335
|
-
end
|
336
|
-
|
337
|
-
def get_reread_date(book_uri)
|
338
|
-
book_reread_date = Yasuri.struct_reread_date '//*[@id="book_edit_area"]/div/form[1]/div[2]' do
|
339
|
-
text_reread_year '//div[@class="reread_box"]/form[1]/div[2]/select[1]/option[1]', truncate: /\d+/, proc: :to_i
|
340
|
-
text_reread_month '//div[@class="reread_box"]/form[1]/div[2]/select[2]/option[1]', truncate: /\d+/, proc: :to_i
|
341
|
-
text_reread_day '//div[@class="reread_box"]/form[1]/div[2]/select[3]/option[1]', truncate: /\d+/, proc: :to_i
|
342
|
-
end
|
343
|
-
book_reread_date.inject(@agent, get_book_page(book_uri))
|
344
|
-
end
|
345
|
-
|
346
|
-
def get_book_structs(page)
|
347
|
-
books = []
|
348
|
-
|
349
|
-
1.upto(NUM_BOOKS_PER_PAGE) do |i|
|
350
|
-
break if page["book_#{i}_link"].empty?
|
351
|
-
|
352
|
-
read_dates = []
|
353
|
-
read_date = get_read_date(page["book_#{i}_link"])
|
354
|
-
unless read_date.empty?
|
355
|
-
read_dates << Time.local(read_date['year'], read_date['month'], read_date['day'])
|
356
|
-
end
|
357
|
-
|
358
|
-
reread_dates = []
|
359
|
-
reread_dates << get_reread_date(page["book_#{i}_link"])
|
360
|
-
reread_dates.flatten!
|
361
|
-
|
362
|
-
unless reread_dates.empty?
|
363
|
-
reread_dates.each do |date|
|
364
|
-
read_dates << Time.local(date['reread_year'], date['reread_month'], date['reread_day'])
|
365
|
-
end
|
366
|
-
end
|
367
|
-
|
368
|
-
book_name = get_book_name(page["book_#{i}_link"])
|
369
|
-
book_author = get_book_author(page["book_#{i}_link"])
|
370
|
-
book = Book.new(book_name, book_author, read_dates)
|
371
|
-
books << book
|
372
|
-
end
|
373
|
-
|
374
|
-
books
|
375
|
-
end
|
376
|
-
|
377
|
-
def get_followings(user_id)
|
378
|
-
users = []
|
379
|
-
scraped_pages = user_id == @log_in_user_id ? scrape_followings_page(user_id)
|
380
|
-
: scrape_others_followings_page(user_id)
|
381
|
-
scraped_pages.each do |page|
|
382
|
-
users << get_user_structs(page)
|
383
|
-
users.flatten!
|
384
|
-
end
|
385
|
-
users
|
386
|
-
end
|
387
|
-
|
388
|
-
def get_followers(user_id)
|
389
|
-
users = []
|
390
|
-
scraped_pages = scrape_followers_page(user_id)
|
391
|
-
scraped_pages.each do |page|
|
392
|
-
users << get_user_structs(page)
|
393
|
-
users.flatten!
|
394
|
-
end
|
395
|
-
users
|
396
|
-
end
|
397
|
-
|
398
|
-
def get_user_structs(page)
|
399
|
-
users = []
|
400
|
-
|
401
|
-
1.upto(NUM_USERS_PER_PAGE) do |i|
|
402
|
-
break if page["user_#{i}_name"].empty?
|
403
|
-
|
404
|
-
user_name = page["user_#{i}_name"]
|
405
|
-
user_id = page["user_#{i}_link"].match(/\/u\/(\d+)$/)[1]
|
406
|
-
user = User.new(user_name, user_id)
|
407
|
-
users << user
|
408
|
-
end
|
409
|
-
|
410
|
-
users
|
411
|
-
end
|
412
|
-
|
413
|
-
def scrape_followings_page(user_id)
|
414
|
-
raise ArgumentError unless user_id =~ /^\d+$/
|
415
|
-
return [] unless logged_in?
|
416
|
-
|
417
|
-
followings_page = @agent.get(Bookmeter.followings_uri(user_id))
|
418
|
-
followings_root = Yasuri.struct_books '//*[@id="main_left"]/div' do
|
419
|
-
1.upto(NUM_USERS_PER_PAGE) do |i|
|
420
|
-
send("text_user_#{i}_name", "//*[@id=\"main_left\"]/div/div[#{i}]/a/@title")
|
421
|
-
send("text_user_#{i}_link", "//*[@id=\"main_left\"]/div/div[#{i}]/a/@href")
|
422
|
-
end
|
423
|
-
end
|
424
|
-
[followings_root.inject(@agent, followings_page)]
|
425
|
-
end
|
426
|
-
|
427
|
-
def scrape_others_followings_page(user_id)
|
428
|
-
scrape_users_listing_page(user_id, :followings_uri)
|
429
|
-
end
|
430
|
-
|
431
|
-
def scrape_followers_page(user_id)
|
432
|
-
scrape_users_listing_page(user_id, :followers_uri)
|
433
|
-
end
|
100
|
+
private
|
434
101
|
|
435
|
-
def
|
436
|
-
raise ArgumentError unless user_id =~
|
437
|
-
raise ArgumentError unless
|
438
|
-
return [] unless logged_in?
|
102
|
+
def fetch_books(user_id, uri_method)
|
103
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
104
|
+
raise ArgumentError unless BookmeterScraper.methods.include?(uri_method)
|
439
105
|
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
send("text_user_#{i}_name", "//*[@id=\"main_left\"]/div/div[#{i}]/div/div[2]/a/@title")
|
444
|
-
send("text_user_#{i}_link", "//*[@id=\"main_left\"]/div/div[#{i}]/div/div[2]/a/@href")
|
445
|
-
end
|
446
|
-
end
|
447
|
-
[root.inject(@agent, page)]
|
106
|
+
books = @scraper.fetch_books(user_id, uri_method)
|
107
|
+
books.each { |book| yield book } if block_given?
|
108
|
+
books.to_a
|
448
109
|
end
|
449
110
|
end
|
450
|
-
|
451
|
-
class BookmeterError < StandardError; end
|
452
111
|
end
|
@@ -1,11 +1,14 @@
|
|
1
|
-
require 'yaml'
|
2
|
-
|
3
1
|
module BookmeterScraper
|
4
2
|
class Configuration
|
5
|
-
|
3
|
+
attr_accessor :mail, :password
|
4
|
+
|
5
|
+
def initialize(config_file = nil)
|
6
|
+
if config_file.nil?
|
7
|
+
@mail = @password = ''
|
8
|
+
return
|
9
|
+
end
|
6
10
|
|
7
|
-
|
8
|
-
config = YAML.load_file(config_file)
|
11
|
+
config = load_yaml_file(config_file)
|
9
12
|
unless config.has_key?('mail') && config.has_key?('password')
|
10
13
|
raise ConfigurationError, "#{config_file}: Invalid configuration file"
|
11
14
|
end
|
@@ -13,6 +16,14 @@ module BookmeterScraper
|
|
13
16
|
@mail = config['mail']
|
14
17
|
@password = config['password']
|
15
18
|
end
|
19
|
+
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def load_yaml_file(config_file)
|
24
|
+
require 'yaml'
|
25
|
+
YAML.load_file(config_file)
|
26
|
+
end
|
16
27
|
end
|
17
28
|
|
18
29
|
class ConfigurationError < StandardError; end
|
@@ -0,0 +1,388 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
require 'mechanize'
|
3
|
+
require 'yasuri'
|
4
|
+
|
5
|
+
module BookmeterScraper
|
6
|
+
class Scraper
|
7
|
+
PROFILE_ATTRIBUTES = %i(
|
8
|
+
name
|
9
|
+
gender
|
10
|
+
age
|
11
|
+
blood_type
|
12
|
+
job
|
13
|
+
address
|
14
|
+
url
|
15
|
+
description
|
16
|
+
first_day
|
17
|
+
elapsed_days
|
18
|
+
read_books_count
|
19
|
+
read_pages_count
|
20
|
+
reviews_count
|
21
|
+
bookshelfs_count
|
22
|
+
)
|
23
|
+
Profile = Struct.new(*PROFILE_ATTRIBUTES)
|
24
|
+
|
25
|
+
JP_ATTRIBUTE_NAMES = {
|
26
|
+
gender: '性別',
|
27
|
+
age: '年齢',
|
28
|
+
blood_type: '血液型',
|
29
|
+
job: '職業',
|
30
|
+
address: '現住所',
|
31
|
+
url: 'URL / ブログ',
|
32
|
+
description: '自己紹介',
|
33
|
+
first_day: '記録初日',
|
34
|
+
elapsed_days: '経過日数',
|
35
|
+
read_books_count: '読んだ本',
|
36
|
+
read_pages_count: '読んだページ',
|
37
|
+
reviews_count: '感想/レビュー',
|
38
|
+
bookshelfs_count: '本棚',
|
39
|
+
}
|
40
|
+
|
41
|
+
BOOK_ATTRIBUTES = %i(name author read_dates uri image_uri)
|
42
|
+
Book = Struct.new(*BOOK_ATTRIBUTES)
|
43
|
+
class Books
|
44
|
+
extend Forwardable
|
45
|
+
|
46
|
+
def_delegator :@books, :[]
|
47
|
+
def_delegator :@books, :[]=
|
48
|
+
def_delegator :@books, :<<
|
49
|
+
def_delegator :@books, :each
|
50
|
+
def_delegator :@books, :flatten!
|
51
|
+
def_delegator :@books, :empty?
|
52
|
+
|
53
|
+
def initialize; @books = []; end
|
54
|
+
|
55
|
+
def concat(books)
|
56
|
+
books.each do |book|
|
57
|
+
next if @books.any? { |b| b.name == book.name && b.author == book.author }
|
58
|
+
@books << book
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def to_a; @books; end
|
63
|
+
end
|
64
|
+
|
65
|
+
USER_ATTRIBUTES = %i(name id uri)
|
66
|
+
User = Struct.new(*USER_ATTRIBUTES)
|
67
|
+
|
68
|
+
NUM_BOOKS_PER_PAGE = 40
|
69
|
+
NUM_USERS_PER_PAGE = 20
|
70
|
+
|
71
|
+
attr_accessor :agent
|
72
|
+
|
73
|
+
|
74
|
+
def initialize(agent = nil)
|
75
|
+
@agent = agent
|
76
|
+
@book_pages = {}
|
77
|
+
end
|
78
|
+
|
79
|
+
def fetch_profile(user_id, agent = @agent)
|
80
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
81
|
+
raise ScraperError if agent.nil?
|
82
|
+
|
83
|
+
Profile.new(*scrape_profile(user_id, agent))
|
84
|
+
end
|
85
|
+
|
86
|
+
def scrape_profile(user_id, agent)
|
87
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
88
|
+
raise ScraperError if agent.nil?
|
89
|
+
|
90
|
+
mypage = agent.get(BookmeterScraper.mypage_uri(user_id))
|
91
|
+
|
92
|
+
profile_dl_tags = mypage.search('#side_left > div.inner > div.profile > dl')
|
93
|
+
jp_attribute_names = profile_dl_tags.map { |i| i.children[0].children.text }
|
94
|
+
attribute_values = profile_dl_tags.map { |i| i.children[1].children.text }
|
95
|
+
jp_attributes = Hash[jp_attribute_names.zip(attribute_values)]
|
96
|
+
|
97
|
+
attributes = PROFILE_ATTRIBUTES.map do |attribute|
|
98
|
+
jp_attributes[JP_ATTRIBUTE_NAMES[attribute]]
|
99
|
+
end
|
100
|
+
attributes[0] = mypage.at_css('#side_left > div.inner > h3').text
|
101
|
+
|
102
|
+
attributes
|
103
|
+
end
|
104
|
+
|
105
|
+
def fetch_books(user_id, uri_method, agent = @agent)
|
106
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
107
|
+
raise ArgumentError unless BookmeterScraper.methods.include?(uri_method)
|
108
|
+
raise ScraperError if agent.nil?
|
109
|
+
return [] unless agent.logged_in?
|
110
|
+
|
111
|
+
books = Books.new
|
112
|
+
scraped_pages = scrape_books_pages(user_id, uri_method)
|
113
|
+
scraped_pages.each do |page|
|
114
|
+
books << extract_books(page)
|
115
|
+
books.flatten!
|
116
|
+
end
|
117
|
+
books
|
118
|
+
end
|
119
|
+
|
120
|
+
def scrape_books_pages(user_id, uri_method, agent = @agent)
|
121
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
122
|
+
raise ArgumentError unless BookmeterScraper.methods.include?(uri_method)
|
123
|
+
raise ScraperError if agent.nil?
|
124
|
+
return [] unless agent.logged_in?
|
125
|
+
|
126
|
+
books_page = agent.get(BookmeterScraper.method(uri_method).call(user_id))
|
127
|
+
|
128
|
+
# if books are not found at all
|
129
|
+
return [] if books_page.search('#main_left > div > center > a').empty?
|
130
|
+
|
131
|
+
if books_page.search('span.now_page').empty?
|
132
|
+
books_root = Yasuri.struct_books '//*[@id="main_left"]/div' do
|
133
|
+
1.upto(NUM_BOOKS_PER_PAGE) do |i|
|
134
|
+
send("text_book_#{i}_name", "//*[@id=\"main_left\"]/div/div[#{i + 1}]/div[2]/a")
|
135
|
+
send("text_book_#{i}_link", "//*[@id=\"main_left\"]/div/div[#{i + 1}]/div[2]/a/@href")
|
136
|
+
end
|
137
|
+
end
|
138
|
+
return [books_root.inject(agent, books_page)]
|
139
|
+
end
|
140
|
+
|
141
|
+
books_root = Yasuri.pages_root '//span[@class="now_page"]/following-sibling::span[1]/a' do
|
142
|
+
text_page_index '//span[@class="now_page"]/a'
|
143
|
+
1.upto(NUM_BOOKS_PER_PAGE) do |i|
|
144
|
+
send("text_book_#{i}_name", "//*[@id=\"main_left\"]/div/div[#{i + 1}]/div[2]/a")
|
145
|
+
send("text_book_#{i}_link", "//*[@id=\"main_left\"]/div/div[#{i + 1}]/div[2]/a/@href")
|
146
|
+
end
|
147
|
+
end
|
148
|
+
books_root.inject(agent, books_page)
|
149
|
+
end
|
150
|
+
|
151
|
+
def extract_books(page)
|
152
|
+
raise ArgumentError if page.nil?
|
153
|
+
|
154
|
+
books = []
|
155
|
+
1.upto(NUM_BOOKS_PER_PAGE) do |i|
|
156
|
+
break if page["book_#{i}_link"].empty?
|
157
|
+
|
158
|
+
read_dates = []
|
159
|
+
read_date = scrape_read_date(page["book_#{i}_link"])
|
160
|
+
unless read_date.empty?
|
161
|
+
read_dates << Time.local(read_date['year'], read_date['month'], read_date['day'])
|
162
|
+
end
|
163
|
+
|
164
|
+
reread_dates = []
|
165
|
+
reread_dates << scrape_reread_date(page["book_#{i}_link"])
|
166
|
+
reread_dates.flatten!
|
167
|
+
|
168
|
+
unless reread_dates.empty?
|
169
|
+
reread_dates.each do |date|
|
170
|
+
read_dates << Time.local(date['reread_year'], date['reread_month'], date['reread_day'])
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
book_path = page["book_#{i}_link"]
|
175
|
+
book_name = scrape_book_name(book_path)
|
176
|
+
book_author = scrape_book_author(book_path)
|
177
|
+
book_image_uri = scrape_book_image_uri(book_path)
|
178
|
+
book = Book.new(book_name,
|
179
|
+
book_author,
|
180
|
+
read_dates,
|
181
|
+
ROOT_URI + book_path,
|
182
|
+
book_image_uri)
|
183
|
+
books << book
|
184
|
+
end
|
185
|
+
|
186
|
+
books
|
187
|
+
end
|
188
|
+
|
189
|
+
def fetch_read_books(user_id, target_year_month)
|
190
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
191
|
+
raise ArgumentError if target_year_month.nil?
|
192
|
+
|
193
|
+
result = Books.new
|
194
|
+
scrape_books_pages(user_id, :read_books_uri).each do |page|
|
195
|
+
first_book_date = scrape_read_date(page['book_1_link'])
|
196
|
+
last_book_date = get_last_book_date(page)
|
197
|
+
|
198
|
+
first_book_year_month = Time.local(first_book_date['year'].to_i, first_book_date['month'].to_i)
|
199
|
+
last_book_year_month = Time.local(last_book_date['year'].to_i, last_book_date['month'].to_i)
|
200
|
+
|
201
|
+
if target_year_month < last_book_year_month
|
202
|
+
next
|
203
|
+
elsif target_year_month == first_book_year_month && target_year_month > last_book_year_month
|
204
|
+
result.concat(fetch_target_books(target_year_month, page))
|
205
|
+
break
|
206
|
+
elsif target_year_month < first_book_year_month && target_year_month > last_book_year_month
|
207
|
+
result.concat(fetch_target_books(target_year_month, page))
|
208
|
+
break
|
209
|
+
elsif target_year_month <= first_book_year_month && target_year_month >= last_book_year_month
|
210
|
+
result.concat(fetch_target_books(target_year_month, page))
|
211
|
+
elsif target_year_month > first_book_year_month
|
212
|
+
break
|
213
|
+
end
|
214
|
+
end
|
215
|
+
result
|
216
|
+
end
|
217
|
+
|
218
|
+
def get_last_book_date(page)
|
219
|
+
raise ArgumentError if page.nil?
|
220
|
+
|
221
|
+
NUM_BOOKS_PER_PAGE.downto(1) do |i|
|
222
|
+
link = page["book_#{i}_link"]
|
223
|
+
next if link.empty?
|
224
|
+
return scrape_read_date(link)
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
def fetch_target_books(target_year_month, page)
|
229
|
+
raise ArgumentError if target_year_month.nil?
|
230
|
+
raise ArgumentError if page.nil?
|
231
|
+
|
232
|
+
target_books = Books.new
|
233
|
+
1.upto(NUM_BOOKS_PER_PAGE) do |i|
|
234
|
+
next if page["book_#{i}_link"].empty?
|
235
|
+
|
236
|
+
read_year_months = []
|
237
|
+
read_date = scrape_read_date(page["book_#{i}_link"])
|
238
|
+
read_dates = [Time.local(read_date['year'], read_date['month'], read_date['day'])]
|
239
|
+
read_year_months << Time.local(read_date['year'], read_date['month'])
|
240
|
+
|
241
|
+
reread_dates = []
|
242
|
+
reread_dates << scrape_reread_date(page["book_#{i}_link"])
|
243
|
+
reread_dates.flatten!
|
244
|
+
|
245
|
+
unless reread_dates.empty?
|
246
|
+
reread_dates.each do |date|
|
247
|
+
read_year_months << Time.local(date['reread_year'], date['reread_month'])
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
next unless read_year_months.include?(target_year_month)
|
252
|
+
|
253
|
+
unless reread_dates.empty?
|
254
|
+
reread_dates.each do |date|
|
255
|
+
read_dates << Time.local(date['reread_year'], date['reread_month'], date['reread_day'])
|
256
|
+
end
|
257
|
+
end
|
258
|
+
book_path = page["book_#{i}_link"]
|
259
|
+
book_name = scrape_book_name(book_path)
|
260
|
+
book_author = scrape_book_author(book_path)
|
261
|
+
book_image_uri = scrape_book_image_uri(book_path)
|
262
|
+
target_books << Book.new(book_name, book_author, read_dates, ROOT_URI + book_path, book_image_uri)
|
263
|
+
end
|
264
|
+
|
265
|
+
target_books
|
266
|
+
end
|
267
|
+
|
268
|
+
def get_book_page(book_uri, agent = @agent)
|
269
|
+
@book_pages[book_uri] = agent.get(ROOT_URI + book_uri) unless @book_pages[book_uri]
|
270
|
+
@book_pages[book_uri]
|
271
|
+
end
|
272
|
+
|
273
|
+
def scrape_book_name(book_uri)
|
274
|
+
get_book_page(book_uri).search('#title').text
|
275
|
+
end
|
276
|
+
|
277
|
+
def scrape_book_author(book_uri)
|
278
|
+
get_book_page(book_uri).search('#author_name').text
|
279
|
+
end
|
280
|
+
|
281
|
+
def scrape_book_image_uri(book_uri)
|
282
|
+
get_book_page(book_uri).search('//*[@id="book_image"]/@src').text
|
283
|
+
end
|
284
|
+
|
285
|
+
def scrape_read_date(book_uri, agent = @agent)
|
286
|
+
book_date = Yasuri.struct_date '//*[@id="book_edit_area"]/form[1]/div[2]' do
|
287
|
+
text_year '//*[@id="read_date_y"]/option[1]', truncate: /\d+/, proc: :to_i
|
288
|
+
text_month '//*[@id="read_date_m"]/option[1]', truncate: /\d+/, proc: :to_i
|
289
|
+
text_day '//*[@id="read_date_d"]/option[1]', truncate: /\d+/, proc: :to_i
|
290
|
+
end
|
291
|
+
book_date.inject(agent, get_book_page(book_uri))
|
292
|
+
end
|
293
|
+
|
294
|
+
def scrape_reread_date(book_uri, agent = @agent)
|
295
|
+
book_reread_date = Yasuri.struct_reread_date '//*[@id="book_edit_area"]/div/form[1]/div[2]' do
|
296
|
+
text_reread_year '//div[@class="reread_box"]/form[1]/div[2]/select[1]/option[1]', truncate: /\d+/, proc: :to_i
|
297
|
+
text_reread_month '//div[@class="reread_box"]/form[1]/div[2]/select[2]/option[1]', truncate: /\d+/, proc: :to_i
|
298
|
+
text_reread_day '//div[@class="reread_box"]/form[1]/div[2]/select[3]/option[1]', truncate: /\d+/, proc: :to_i
|
299
|
+
end
|
300
|
+
book_reread_date.inject(agent, get_book_page(book_uri))
|
301
|
+
end
|
302
|
+
|
303
|
+
def fetch_followings(user_id, agent = @agent)
|
304
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
305
|
+
raise ScraperError if agent.nil?
|
306
|
+
return [] unless agent.logged_in?
|
307
|
+
|
308
|
+
users = []
|
309
|
+
scraped_pages = user_id == agent.log_in_user_id ? scrape_followings_page(user_id)
|
310
|
+
: scrape_others_followings_page(user_id)
|
311
|
+
scraped_pages.each do |page|
|
312
|
+
users << extract_users(page)
|
313
|
+
users.flatten!
|
314
|
+
end
|
315
|
+
users
|
316
|
+
end
|
317
|
+
|
318
|
+
def fetch_followers(user_id, agent = @agent)
|
319
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
320
|
+
raise ScraperError if agent.nil?
|
321
|
+
return [] unless agent.logged_in?
|
322
|
+
|
323
|
+
users = []
|
324
|
+
scraped_pages = scrape_followers_page(user_id)
|
325
|
+
scraped_pages.each do |page|
|
326
|
+
users << extract_users(page)
|
327
|
+
users.flatten!
|
328
|
+
end
|
329
|
+
users
|
330
|
+
end
|
331
|
+
|
332
|
+
def scrape_followings_page(user_id, agent = @agent)
|
333
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
334
|
+
return [] unless agent.logged_in?
|
335
|
+
|
336
|
+
followings_page = agent.get(BookmeterScraper.followings_uri(user_id))
|
337
|
+
followings_root = Yasuri.struct_books '//*[@id="main_left"]/div' do
|
338
|
+
1.upto(NUM_USERS_PER_PAGE) do |i|
|
339
|
+
send("text_user_#{i}_name", "//*[@id=\"main_left\"]/div/div[#{i}]/a/@title")
|
340
|
+
send("text_user_#{i}_link", "//*[@id=\"main_left\"]/div/div[#{i}]/a/@href")
|
341
|
+
end
|
342
|
+
end
|
343
|
+
[followings_root.inject(agent, followings_page)]
|
344
|
+
end
|
345
|
+
|
346
|
+
def scrape_others_followings_page(user_id)
|
347
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
348
|
+
scrape_users_listing_page(user_id, :followings_uri)
|
349
|
+
end
|
350
|
+
|
351
|
+
def scrape_followers_page(user_id)
|
352
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
353
|
+
scrape_users_listing_page(user_id, :followers_uri)
|
354
|
+
end
|
355
|
+
|
356
|
+
def scrape_users_listing_page(user_id, uri_method, agent = @agent)
|
357
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
358
|
+
raise ArgumentError unless BookmeterScraper.methods.include?(uri_method)
|
359
|
+
return [] unless agent.logged_in?
|
360
|
+
|
361
|
+
page = agent.get(BookmeterScraper.method(uri_method).call(user_id))
|
362
|
+
root = Yasuri.struct_users '//*[@id="main_left"]/div' do
|
363
|
+
1.upto(NUM_USERS_PER_PAGE) do |i|
|
364
|
+
send("text_user_#{i}_name", "//*[@id=\"main_left\"]/div/div[#{i}]/div/div[2]/a/@title")
|
365
|
+
send("text_user_#{i}_link", "//*[@id=\"main_left\"]/div/div[#{i}]/div/div[2]/a/@href")
|
366
|
+
end
|
367
|
+
end
|
368
|
+
[root.inject(agent, page)]
|
369
|
+
end
|
370
|
+
|
371
|
+
def extract_users(page)
|
372
|
+
raise ArgumentError if page.nil?
|
373
|
+
|
374
|
+
users = []
|
375
|
+
1.upto(NUM_USERS_PER_PAGE) do |i|
|
376
|
+
break if page["user_#{i}_name"].empty?
|
377
|
+
|
378
|
+
user_name = page["user_#{i}_name"]
|
379
|
+
user_id = page["user_#{i}_link"].match(/\/u\/(\d+)$/)[1]
|
380
|
+
users << User.new(user_name, user_id, ROOT_URI + "/u/#{user_id}")
|
381
|
+
end
|
382
|
+
|
383
|
+
users
|
384
|
+
end
|
385
|
+
end
|
386
|
+
|
387
|
+
class ScraperError < StandardError; end
|
388
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bookmeter_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kohei Yamamoto
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-03-
|
11
|
+
date: 2016-03-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -115,8 +115,10 @@ files:
|
|
115
115
|
- bookmeter_scraper.gemspec
|
116
116
|
- exe/bookmeter_scraper
|
117
117
|
- lib/bookmeter_scraper.rb
|
118
|
+
- lib/bookmeter_scraper/agent.rb
|
118
119
|
- lib/bookmeter_scraper/bookmeter.rb
|
119
120
|
- lib/bookmeter_scraper/configuration.rb
|
121
|
+
- lib/bookmeter_scraper/scraper.rb
|
120
122
|
- lib/bookmeter_scraper/version.rb
|
121
123
|
homepage: https://github.com/kymmt90/bookmeter_scraper
|
122
124
|
licenses:
|
@@ -138,7 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
138
140
|
version: '0'
|
139
141
|
requirements: []
|
140
142
|
rubyforge_project:
|
141
|
-
rubygems_version: 2.
|
143
|
+
rubygems_version: 2.5.1
|
142
144
|
signing_key:
|
143
145
|
specification_version: 4
|
144
146
|
summary: Bookmeter scraping library
|