bookmeter_scraper 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.ja.md +42 -5
- data/README.md +40 -4
- data/lib/bookmeter_scraper.rb +46 -0
- data/lib/bookmeter_scraper/agent.rb +59 -0
- data/lib/bookmeter_scraper/bookmeter.rb +52 -393
- data/lib/bookmeter_scraper/configuration.rb +16 -5
- data/lib/bookmeter_scraper/scraper.rb +388 -0
- data/lib/bookmeter_scraper/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eed0f25219959cbcb0f1e74a0db32d7f6ef46de8
|
4
|
+
data.tar.gz: bf5981a2fcb2c933c41720cb99846ac8d1df7dad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 894e75e566f6e547089048bf6872917c79dcb2a9456d36afd59dc624bdd62a67b9bdce23cd811e1035408b14bc7eba48928e03337d50fed102206daee899cf5f
|
7
|
+
data.tar.gz: 3077ac2b3b900537f494ed3fe001cb2be7af6a726293945ad738215ea205ac536d53ac3ad70ba1587dfbcedf4d820387845036c31fc80af92445c4ea2ffd9388
|
data/README.ja.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
# Bookmeter Scraper [![Build Status](https://travis-ci.org/kymmt90/bookmeter_scraper.svg?branch=master)](https://travis-ci.org/kymmt90/bookmeter_scraper)
|
1
|
+
# Bookmeter Scraper [![Build Status](https://travis-ci.org/kymmt90/bookmeter_scraper.svg?branch=master)](https://travis-ci.org/kymmt90/bookmeter_scraper) [![Gem Version](https://badge.fury.io/rb/bookmeter_scraper.svg)](https://badge.fury.io/rb/bookmeter_scraper)
|
2
|
+
|
2
3
|
|
3
4
|
[読書メーター](http://bookmeter.com)の情報をスクレイピングして Ruby で扱えるようにするための gem です。
|
4
5
|
|
@@ -30,10 +31,11 @@ require 'bookmeter_scraper'
|
|
30
31
|
|
31
32
|
書籍情報、お気に入り / お気に入られユーザ情報を取得するには、`Bookmeter.log_in` または `Bookmeter#log_in` でログインしておく必要があります。
|
32
33
|
|
33
|
-
ログイン情報の入力には以下の
|
34
|
+
ログイン情報の入力には以下の 3 通りの方法があります。
|
34
35
|
|
35
36
|
1. 引数として渡す
|
36
37
|
2. `config.yml` へ記述しておく
|
38
|
+
3. ブロック内で設定する
|
37
39
|
|
38
40
|
#### 1. 引数として渡す
|
39
41
|
|
@@ -67,6 +69,28 @@ bookmeter = BookmeterScraper::Bookmeter.log_in
|
|
67
69
|
bookmeter.logged_in? # true
|
68
70
|
```
|
69
71
|
|
72
|
+
#### 3. ブロック内で設定する
|
73
|
+
|
74
|
+
以下のように `Bookmeter.log_in` へブロックを渡すことで、ログインできます。
|
75
|
+
|
76
|
+
```ruby
|
77
|
+
bookmeter = BookmeterScraper::Bookmeter.log_in do |configuration|
|
78
|
+
configuration.mail = 'example@example.com'
|
79
|
+
configuration.password = 'password'
|
80
|
+
end
|
81
|
+
bookmeter.logged_in? # true
|
82
|
+
```
|
83
|
+
|
84
|
+
`Bookmeter#log_in` でもログイン可能です。
|
85
|
+
|
86
|
+
```ruby
|
87
|
+
bookmeter = BookmeterScraper::Bookmeter.new
|
88
|
+
bookmeter.log_in do |configuration|
|
89
|
+
configuration.mail = 'example@example.com'
|
90
|
+
configuration.password = 'password'
|
91
|
+
end
|
92
|
+
```
|
93
|
+
|
70
94
|
### 書籍情報の取得
|
71
95
|
|
72
96
|
以下の書籍情報
|
@@ -76,7 +100,7 @@ bookmeter.logged_in? # true
|
|
76
100
|
- 積読本
|
77
101
|
- 読みたい本
|
78
102
|
|
79
|
-
|
103
|
+
を取得できます。取得には `Bookmeter.log_in` などによる事前のログインが必要です。
|
80
104
|
|
81
105
|
#### 読んだ本
|
82
106
|
|
@@ -92,13 +116,17 @@ bookmeter.read_books('01010101') # 他のユーザの ID を指定して、
|
|
92
116
|
- 書名 `name`
|
93
117
|
- 著者 `author`
|
94
118
|
- 読了日(初読了日と再読日の両方)の配列 `read_dates`
|
119
|
+
- 読書メーター内の書籍ページの URI `uri`
|
120
|
+
- 書籍の表紙画像 URI `image_uri`
|
95
121
|
|
96
|
-
を属性として持つ `
|
122
|
+
を属性として持つ `Book` の配列として取得できます。
|
97
123
|
|
98
124
|
```ruby
|
99
125
|
books[0].name
|
100
126
|
books[0].author
|
101
127
|
books[0].read_dates
|
128
|
+
books[0].uri
|
129
|
+
books[0].image_uri
|
102
130
|
```
|
103
131
|
|
104
132
|
さらに、`Bookmeter#read_books_in` で特定年月の「読んだ本」情報が取得できます。
|
@@ -129,6 +157,8 @@ books = bookmeter.reading_books # ログインユーザの「読んでる本
|
|
129
157
|
books[0].name
|
130
158
|
books[0].author
|
131
159
|
books[0].read_dates # 読了日の Array は空
|
160
|
+
books[0].uri
|
161
|
+
books[0].image_uri
|
132
162
|
|
133
163
|
bookmeter.tsundoku # ログインユーザの「積読本」を取得
|
134
164
|
bookmeter.wish_list # ログインユーザの「読みたい本」を取得
|
@@ -143,13 +173,20 @@ following_users = bookmeter.followings # 「お気に入り」ユーザの情
|
|
143
173
|
followers = bookmeter.followers # 「お気に入られ」ユーザの情報を取得
|
144
174
|
```
|
145
175
|
|
146
|
-
|
176
|
+
ユーザ情報は
|
177
|
+
|
178
|
+
- ユーザ名 `name`
|
179
|
+
- ユーザ ID `id`
|
180
|
+
- 読書メーター内のユーザページの URI `uri`
|
181
|
+
|
182
|
+
を持つ `User` の配列として取得できます。
|
147
183
|
|
148
184
|
```ruby
|
149
185
|
following_users[0].name
|
150
186
|
following_users[0].id
|
151
187
|
followers[0].name
|
152
188
|
followers[0].id
|
189
|
+
followers[0].uri
|
153
190
|
```
|
154
191
|
|
155
192
|
#### 注意
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Bookmeter Scraper [![Build Status](https://travis-ci.org/kymmt90/bookmeter_scraper.svg?branch=master)](https://travis-ci.org/kymmt90/bookmeter_scraper)
|
1
|
+
# Bookmeter Scraper [![Build Status](https://travis-ci.org/kymmt90/bookmeter_scraper.svg?branch=master)](https://travis-ci.org/kymmt90/bookmeter_scraper) [![Gem Version](https://badge.fury.io/rb/bookmeter_scraper.svg)](https://badge.fury.io/rb/bookmeter_scraper)
|
2
2
|
|
3
3
|
A library for scraping [Bookmeter](http://bookmeter.com).
|
4
4
|
|
@@ -34,10 +34,11 @@ require 'bookmeter_scraper'
|
|
34
34
|
|
35
35
|
You need to log in Bookmeter to get books and followings / followers information by `Bookmeter.log_in` or `Bookmeter#log_in`.
|
36
36
|
|
37
|
-
There are
|
37
|
+
There are 3 ways to input authentication information:
|
38
38
|
|
39
39
|
1. Passing as arguments
|
40
40
|
2. Writing out to `config.yml`
|
41
|
+
3. Configuring in a block
|
41
42
|
|
42
43
|
#### 1. Passing as arguments
|
43
44
|
|
@@ -71,6 +72,27 @@ bookmeter = BookmeterScraper::Bookmeter.log_in
|
|
71
72
|
bookmeter.logged_in? # true
|
72
73
|
```
|
73
74
|
|
75
|
+
#### 3. Configuring in a block
|
76
|
+
|
77
|
+
You can configure mail address and password in a block.
|
78
|
+
|
79
|
+
```ruby
|
80
|
+
bookmeter = BookmeterScraper::Bookmeter.log_in do |configuration|
|
81
|
+
configuration.mail = 'example@example.com'
|
82
|
+
configuration.password = 'password'
|
83
|
+
end
|
84
|
+
bookmeter.logged_in? # true
|
85
|
+
```
|
86
|
+
|
87
|
+
`Bookmeter#log_in` is also available:
|
88
|
+
|
89
|
+
```ruby
|
90
|
+
bookmeter = BookmeterScraper::Bookmeter.new
|
91
|
+
bookmeter.log_in do |configuration|
|
92
|
+
configuration.mail = 'example@example.com'
|
93
|
+
configuration.password = 'password'
|
94
|
+
end
|
95
|
+
```
|
74
96
|
|
75
97
|
### Get books information
|
76
98
|
|
@@ -92,12 +114,20 @@ books = bookmeter.read_books # get read books of the logged in user
|
|
92
114
|
bookmeter.read_books('01010101') # get read books of a user specified by ID
|
93
115
|
```
|
94
116
|
|
95
|
-
Books infomation is an array of `
|
117
|
+
Books infomation is an array of `Book` which has these attributes:
|
118
|
+
|
119
|
+
- `name`
|
120
|
+
- `read_dates`
|
121
|
+
- `uri`
|
122
|
+
- `image_uri`
|
123
|
+
|
96
124
|
`read_dates` is an array of finished reading dates (first finished date and reread dates):
|
97
125
|
|
98
126
|
```ruby
|
99
127
|
books[0].name
|
100
128
|
books[0].read_dates
|
129
|
+
books[0].uri
|
130
|
+
books[0].image_uri
|
101
131
|
```
|
102
132
|
|
103
133
|
To specify year-month for read books, you can use `Bookmeter#read_books_in`:
|
@@ -135,13 +165,19 @@ followers = bookmeter.followers
|
|
135
165
|
|
136
166
|
You need to log in Bookmeter in advance to get these information.
|
137
167
|
|
138
|
-
Users information is an array of `Struct` which has
|
168
|
+
Users information is an array of `Struct` which has following attributes:
|
169
|
+
|
170
|
+
- `name`
|
171
|
+
- `id`
|
172
|
+
- `uri`
|
139
173
|
|
140
174
|
```ruby
|
141
175
|
following_users[0].name
|
142
176
|
following_users[0].id
|
177
|
+
following_users[0].uri
|
143
178
|
followers[0].name
|
144
179
|
followers[0].id
|
180
|
+
followers[0].uri
|
145
181
|
```
|
146
182
|
|
147
183
|
#### Notice
|
data/lib/bookmeter_scraper.rb
CHANGED
@@ -1,3 +1,49 @@
|
|
1
1
|
require 'bookmeter_scraper/bookmeter'
|
2
2
|
require 'bookmeter_scraper/configuration'
|
3
3
|
require 'bookmeter_scraper/version'
|
4
|
+
|
5
|
+
module BookmeterScraper
|
6
|
+
ROOT_URI = 'http://bookmeter.com'.freeze
|
7
|
+
LOGIN_URI = "#{ROOT_URI}/login".freeze
|
8
|
+
|
9
|
+
USER_ID_REGEX = /^\d+$/
|
10
|
+
|
11
|
+
class << self
|
12
|
+
def mypage_uri(user_id)
|
13
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
14
|
+
"#{ROOT_URI}/u/#{user_id}"
|
15
|
+
end
|
16
|
+
|
17
|
+
def read_books_uri(user_id)
|
18
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
19
|
+
"#{ROOT_URI}/u/#{user_id}/booklist"
|
20
|
+
end
|
21
|
+
|
22
|
+
def reading_books_uri(user_id)
|
23
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
24
|
+
"#{ROOT_URI}/u/#{user_id}/booklistnow"
|
25
|
+
end
|
26
|
+
|
27
|
+
def tsundoku_uri(user_id)
|
28
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
29
|
+
"#{ROOT_URI}/u/#{user_id}/booklisttun"
|
30
|
+
end
|
31
|
+
|
32
|
+
def wish_list_uri(user_id)
|
33
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
34
|
+
"#{ROOT_URI}/u/#{user_id}/booklistpre"
|
35
|
+
end
|
36
|
+
|
37
|
+
def followings_uri(user_id)
|
38
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
39
|
+
"#{ROOT_URI}/u/#{user_id}/favorite_user"
|
40
|
+
end
|
41
|
+
|
42
|
+
def followers_uri(user_id)
|
43
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
44
|
+
"#{ROOT_URI}/u/#{user_id}/favorited_user"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
class BookmeterError < StandardError; end
|
49
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
|
3
|
+
module BookmeterScraper
|
4
|
+
class Agent
|
5
|
+
extend Forwardable
|
6
|
+
def_delegator :@agent, :get
|
7
|
+
def_delegator :@agent, :click
|
8
|
+
|
9
|
+
attr_reader :log_in_user_id
|
10
|
+
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@agent = Mechanize.new do |a|
|
14
|
+
a.user_agent_alias = Mechanize::AGENT_ALIASES.keys.reject do |ua_alias|
|
15
|
+
%w(Android iPad iPhone Mechanize).include?(ua_alias)
|
16
|
+
end.sample
|
17
|
+
end
|
18
|
+
@log_in_user_id = nil
|
19
|
+
end
|
20
|
+
|
21
|
+
def log_in(config)
|
22
|
+
raise ArgumentError if config.nil?
|
23
|
+
|
24
|
+
page_after_submitting_form = nil
|
25
|
+
@agent.get(BookmeterScraper::LOGIN_URI) do |page|
|
26
|
+
page_after_submitting_form = page.form_with(action: '/login') do |form|
|
27
|
+
form.field_with(name: 'mail').value = config.mail
|
28
|
+
form.field_with(name: 'password').value = config.password
|
29
|
+
end.submit
|
30
|
+
end
|
31
|
+
|
32
|
+
if page_after_logging_in? page_after_submitting_form
|
33
|
+
mypage = page_after_submitting_form.link_with(text: 'マイページ').click
|
34
|
+
@log_in_user_id = extract_user_id(mypage)
|
35
|
+
else
|
36
|
+
nil
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def logged_in?
|
41
|
+
!@log_in_user_id.nil?
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def page_after_logging_in?(page)
|
48
|
+
raise ArgumentError if page.nil?
|
49
|
+
|
50
|
+
page.uri.to_s == BookmeterScraper::ROOT_URI + '/'
|
51
|
+
end
|
52
|
+
|
53
|
+
def extract_user_id(page)
|
54
|
+
raise ArgumentError if page.nil?
|
55
|
+
|
56
|
+
page.uri.to_s.match(/\/u\/(\d+)$/)[1]
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -1,130 +1,51 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
3
|
-
require 'yasuri'
|
1
|
+
require 'bookmeter_scraper/agent'
|
2
|
+
require 'bookmeter_scraper/scraper'
|
4
3
|
|
5
4
|
module BookmeterScraper
|
6
5
|
class Bookmeter
|
7
6
|
DEFAULT_CONFIG_PATH = './config.yml'.freeze
|
8
7
|
|
9
|
-
ROOT_URI = 'http://bookmeter.com'.freeze
|
10
|
-
LOGIN_URI = "#{ROOT_URI}/login".freeze
|
11
|
-
|
12
|
-
PROFILE_ATTRIBUTES = %i(name gender age blood_type job address url description first_day elapsed_days read_books_count read_pages_count reviews_count bookshelfs_count)
|
13
|
-
Profile = Struct.new(*PROFILE_ATTRIBUTES)
|
14
|
-
|
15
|
-
BOOK_ATTRIBUTES = %i(name author read_dates)
|
16
|
-
Book = Struct.new(*BOOK_ATTRIBUTES)
|
17
|
-
class Books
|
18
|
-
extend Forwardable
|
19
|
-
|
20
|
-
def_delegator :@books, :[]
|
21
|
-
def_delegator :@books, :[]=
|
22
|
-
def_delegator :@books, :<<
|
23
|
-
def_delegator :@books, :each
|
24
|
-
def_delegator :@books, :flatten!
|
25
|
-
|
26
|
-
def initialize; @books = []; end
|
27
|
-
|
28
|
-
def concat(books)
|
29
|
-
books.each do |book|
|
30
|
-
next if @books.any? { |b| b.name == book.name && b.author == book.author }
|
31
|
-
@books << book
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
def to_a; @books; end
|
36
|
-
end
|
37
|
-
|
38
|
-
USER_ATTRIBUTES = %i(name id)
|
39
|
-
User = Struct.new(*USER_ATTRIBUTES)
|
40
|
-
|
41
|
-
JP_ATTRIBUTE_NAMES = {
|
42
|
-
gender: '性別',
|
43
|
-
age: '年齢',
|
44
|
-
blood_type: '血液型',
|
45
|
-
job: '職業',
|
46
|
-
address: '現住所',
|
47
|
-
url: 'URL / ブログ',
|
48
|
-
description: '自己紹介',
|
49
|
-
first_day: '記録初日',
|
50
|
-
elapsed_days: '経過日数',
|
51
|
-
read_books_count: '読んだ本',
|
52
|
-
read_pages_count: '読んだページ',
|
53
|
-
reviews_count: '感想/レビュー',
|
54
|
-
bookshelfs_count: '本棚',
|
55
|
-
}
|
56
|
-
|
57
|
-
NUM_BOOKS_PER_PAGE = 40
|
58
|
-
NUM_USERS_PER_PAGE = 20
|
59
|
-
|
60
8
|
attr_reader :log_in_user_id
|
61
9
|
|
62
|
-
def self.mypage_uri(user_id)
|
63
|
-
raise ArgumentError unless user_id =~ /^\d+$/
|
64
|
-
"#{ROOT_URI}/u/#{user_id}"
|
65
|
-
end
|
66
|
-
|
67
|
-
def self.read_books_uri(user_id)
|
68
|
-
raise ArgumentError unless user_id =~ /^\d+$/
|
69
|
-
"#{ROOT_URI}/u/#{user_id}/booklist"
|
70
|
-
end
|
71
|
-
|
72
|
-
def self.reading_books_uri(user_id)
|
73
|
-
raise ArgumentError unless user_id =~ /^\d+$/
|
74
|
-
"#{ROOT_URI}/u/#{user_id}/booklistnow"
|
75
|
-
end
|
76
|
-
|
77
|
-
def self.tsundoku_uri(user_id)
|
78
|
-
raise ArgumentError unless user_id =~ /^\d+$/
|
79
|
-
"#{ROOT_URI}/u/#{user_id}/booklisttun"
|
80
|
-
end
|
81
|
-
|
82
|
-
def self.wish_list_uri(user_id)
|
83
|
-
raise ArgumentError unless user_id =~ /^\d+$/
|
84
|
-
"#{ROOT_URI}/u/#{user_id}/booklistpre"
|
85
|
-
end
|
86
|
-
|
87
|
-
def self.followings_uri(user_id)
|
88
|
-
raise ArgumentError unless user_id =~ /^\d+$/
|
89
|
-
"#{ROOT_URI}/u/#{user_id}/favorite_user"
|
90
|
-
end
|
91
|
-
|
92
|
-
def self.followers_uri(user_id)
|
93
|
-
raise ArgumentError unless user_id =~ /^\d+$/
|
94
|
-
"#{ROOT_URI}/u/#{user_id}/favorited_user"
|
95
|
-
end
|
96
10
|
|
97
|
-
|
98
|
-
|
99
|
-
|
11
|
+
class << self
|
12
|
+
def log_in(mail = nil, password = nil)
|
13
|
+
Bookmeter.new.tap do |bookmeter|
|
14
|
+
if block_given?
|
15
|
+
config = Configuration.new
|
16
|
+
yield config
|
17
|
+
bookmeter.log_in(config.mail, config.password)
|
18
|
+
else
|
19
|
+
bookmeter.log_in(mail, password)
|
20
|
+
end
|
21
|
+
end
|
100
22
|
end
|
101
23
|
end
|
102
24
|
|
103
25
|
|
104
26
|
def initialize(agent = nil)
|
105
|
-
@agent
|
106
|
-
@
|
27
|
+
@agent = agent.nil? ? Agent.new : agent
|
28
|
+
@scraper = Scraper.new(@agent)
|
29
|
+
@logged_in = false
|
107
30
|
@log_in_user_id = nil
|
108
|
-
@book_pages = {}
|
109
31
|
end
|
110
32
|
|
111
33
|
def log_in(mail = nil, password = nil)
|
112
34
|
raise BookmeterError if @agent.nil?
|
113
35
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
return unless logged_in?
|
36
|
+
configuration = if block_given?
|
37
|
+
Configuration.new.tap { |config| yield config }
|
38
|
+
elsif mail.nil? && password.nil?
|
39
|
+
Configuration.new(DEFAULT_CONFIG_PATH)
|
40
|
+
else
|
41
|
+
Configuration.new.tap do |config|
|
42
|
+
config.mail = mail
|
43
|
+
config.password = password
|
44
|
+
end
|
45
|
+
end
|
125
46
|
|
126
|
-
|
127
|
-
@
|
47
|
+
@log_in_user_id = @agent.log_in(configuration)
|
48
|
+
@logged_in = !@log_in_user_id.nil?
|
128
49
|
end
|
129
50
|
|
130
51
|
def logged_in?
|
@@ -132,321 +53,59 @@ module BookmeterScraper
|
|
132
53
|
end
|
133
54
|
|
134
55
|
def profile(user_id)
|
135
|
-
raise ArgumentError unless user_id =~
|
136
|
-
|
137
|
-
mypage = @agent.get(Bookmeter.mypage_uri(user_id))
|
138
|
-
|
139
|
-
profile_dl_tags = mypage.search('#side_left > div.inner > div.profile > dl')
|
140
|
-
jp_attribute_names = profile_dl_tags.map { |i| i.children[0].children.text }
|
141
|
-
attribute_values = profile_dl_tags.map { |i| i.children[1].children.text }
|
142
|
-
jp_attributes = Hash[jp_attribute_names.zip(attribute_values)]
|
143
|
-
attributes = PROFILE_ATTRIBUTES.map do |attribute|
|
144
|
-
jp_attributes[JP_ATTRIBUTE_NAMES[attribute]]
|
145
|
-
end
|
146
|
-
attributes[0] = mypage.at_css('#side_left > div.inner > h3').text
|
147
|
-
|
148
|
-
Profile.new(*attributes)
|
56
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
57
|
+
@scraper.fetch_profile(user_id)
|
149
58
|
end
|
150
59
|
|
151
60
|
def read_books(user_id = @log_in_user_id)
|
152
|
-
|
153
|
-
|
154
|
-
books.to_a
|
61
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
62
|
+
fetch_books(user_id, :read_books_uri)
|
155
63
|
end
|
156
64
|
|
157
65
|
def read_books_in(year, month, user_id = @log_in_user_id)
|
66
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
67
|
+
|
158
68
|
date = Time.local(year, month)
|
159
|
-
books =
|
69
|
+
books = @scraper.fetch_read_books(user_id, date)
|
160
70
|
books.each { |b| yield b } if block_given?
|
161
71
|
books.to_a
|
162
72
|
end
|
163
73
|
|
164
74
|
def reading_books(user_id = @log_in_user_id)
|
165
|
-
|
166
|
-
|
167
|
-
books.to_a
|
75
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
76
|
+
fetch_books(user_id, :reading_books_uri)
|
168
77
|
end
|
169
78
|
|
170
79
|
def tsundoku(user_id = @log_in_user_id)
|
171
|
-
|
172
|
-
|
173
|
-
books.to_a
|
80
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
81
|
+
fetch_books(user_id, :tsundoku_uri)
|
174
82
|
end
|
175
83
|
|
176
84
|
def wish_list(user_id = @log_in_user_id)
|
177
|
-
|
178
|
-
|
179
|
-
books.to_a
|
85
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
86
|
+
fetch_books(user_id, :wish_list_uri)
|
180
87
|
end
|
181
88
|
|
182
89
|
def followings(user_id = @log_in_user_id)
|
183
|
-
|
90
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
91
|
+
@scraper.fetch_followings(user_id)
|
184
92
|
end
|
185
93
|
|
186
94
|
def followers(user_id = @log_in_user_id)
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
private
|
191
|
-
|
192
|
-
def self.new_agent
|
193
|
-
agent = Mechanize.new do |a|
|
194
|
-
a.user_agent_alias = Mechanize::AGENT_ALIASES.keys.reject do |ua_alias|
|
195
|
-
%w(Android iPad iPhone Mechanize).include?(ua_alias)
|
196
|
-
end.sample
|
197
|
-
end
|
198
|
-
end
|
199
|
-
|
200
|
-
def extract_user_id(page)
|
201
|
-
page.uri.to_s.match(/\/u\/(\d+)$/)[1]
|
202
|
-
end
|
203
|
-
|
204
|
-
def get_books(user_id, uri_method)
|
205
|
-
books = Books.new
|
206
|
-
scraped_pages = scrape_book_pages(user_id, uri_method)
|
207
|
-
scraped_pages.each do |page|
|
208
|
-
books << get_book_structs(page)
|
209
|
-
books.flatten!
|
210
|
-
end
|
211
|
-
books
|
95
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
96
|
+
@scraper.fetch_followers(user_id)
|
212
97
|
end
|
213
98
|
|
214
|
-
def get_read_books(user_id, target_ym)
|
215
|
-
result = Books.new
|
216
|
-
scrape_book_pages(user_id, :read_books_uri).each do |page|
|
217
|
-
first_book_date = get_read_date(page['book_1_link'])
|
218
|
-
last_book_date = get_last_book_date(page)
|
219
|
-
|
220
|
-
first_book_ym = Time.local(first_book_date['year'].to_i, first_book_date['month'].to_i)
|
221
|
-
last_book_ym = Time.local(last_book_date['year'].to_i, last_book_date['month'].to_i)
|
222
|
-
|
223
|
-
if target_ym < last_book_ym
|
224
|
-
next
|
225
|
-
elsif target_ym == first_book_ym && target_ym > last_book_ym
|
226
|
-
result.concat(get_target_books(target_ym, page))
|
227
|
-
break
|
228
|
-
elsif target_ym < first_book_ym && target_ym > last_book_ym
|
229
|
-
result.concat(get_target_books(target_ym, page))
|
230
|
-
break
|
231
|
-
elsif target_ym <= first_book_ym && target_ym >= last_book_ym
|
232
|
-
result.concat(get_target_books(target_ym, page))
|
233
|
-
elsif target_ym > first_book_ym
|
234
|
-
break
|
235
|
-
end
|
236
|
-
end
|
237
|
-
result
|
238
|
-
end
|
239
|
-
|
240
|
-
def get_last_book_date(page)
|
241
|
-
NUM_BOOKS_PER_PAGE.downto(1) do |i|
|
242
|
-
link = page["book_#{i}_link"]
|
243
|
-
next if link.empty?
|
244
|
-
return get_read_date(link)
|
245
|
-
end
|
246
|
-
end
|
247
|
-
|
248
|
-
def get_target_books(target_ym, page)
|
249
|
-
target_books = Books.new
|
250
|
-
|
251
|
-
1.upto(NUM_BOOKS_PER_PAGE) do |i|
|
252
|
-
next if page["book_#{i}_link"].empty?
|
253
|
-
|
254
|
-
read_yms = []
|
255
|
-
read_date = get_read_date(page["book_#{i}_link"])
|
256
|
-
read_dates = [Time.local(read_date['year'], read_date['month'], read_date['day'])]
|
257
|
-
read_yms << Time.local(read_date['year'], read_date['month'])
|
258
|
-
|
259
|
-
reread_dates = []
|
260
|
-
reread_dates << get_reread_date(page["book_#{i}_link"])
|
261
|
-
reread_dates.flatten!
|
262
|
-
|
263
|
-
unless reread_dates.empty?
|
264
|
-
reread_dates.each do |date|
|
265
|
-
read_yms << Time.local(date['reread_year'], date['reread_month'])
|
266
|
-
end
|
267
|
-
end
|
268
99
|
|
269
|
-
|
270
|
-
|
271
|
-
unless reread_dates.empty?
|
272
|
-
reread_dates.each do |date|
|
273
|
-
read_dates << Time.local(date['reread_year'], date['reread_month'], date['reread_day'])
|
274
|
-
end
|
275
|
-
end
|
276
|
-
book_name = get_book_name(page["book_#{i}_link"])
|
277
|
-
book_author = get_book_author(page["book_#{i}_link"])
|
278
|
-
book = Book.new(book_name, book_author, read_dates)
|
279
|
-
target_books << book
|
280
|
-
end
|
281
|
-
|
282
|
-
target_books
|
283
|
-
end
|
284
|
-
|
285
|
-
def scrape_book_pages(user_id, uri_method)
|
286
|
-
raise ArgumentError unless user_id =~ /^\d+$/
|
287
|
-
raise ArgumentError unless Bookmeter.methods.include?(uri_method)
|
288
|
-
return [] unless logged_in?
|
289
|
-
|
290
|
-
books_page = @agent.get(Bookmeter.method(uri_method).call(user_id))
|
291
|
-
|
292
|
-
# if books are not found at all
|
293
|
-
return [] if books_page.search('#main_left > div > center > a').empty?
|
294
|
-
|
295
|
-
if books_page.search('span.now_page').empty?
|
296
|
-
books_root = Yasuri.struct_books '//*[@id="main_left"]/div' do
|
297
|
-
1.upto(NUM_BOOKS_PER_PAGE) do |i|
|
298
|
-
send("text_book_#{i}_name", "//*[@id=\"main_left\"]/div/div[#{i + 1}]/div[2]/a")
|
299
|
-
send("text_book_#{i}_link", "//*[@id=\"main_left\"]/div/div[#{i + 1}]/div[2]/a/@href")
|
300
|
-
end
|
301
|
-
end
|
302
|
-
return [books_root.inject(@agent, books_page)]
|
303
|
-
end
|
304
|
-
|
305
|
-
books_root = Yasuri.pages_root '//span[@class="now_page"]/following-sibling::span[1]/a' do
|
306
|
-
text_page_index '//span[@class="now_page"]/a'
|
307
|
-
1.upto(NUM_BOOKS_PER_PAGE) do |i|
|
308
|
-
send("text_book_#{i}_name", "//*[@id=\"main_left\"]/div/div[#{i + 1}]/div[2]/a")
|
309
|
-
send("text_book_#{i}_link", "//*[@id=\"main_left\"]/div/div[#{i + 1}]/div[2]/a/@href")
|
310
|
-
end
|
311
|
-
end
|
312
|
-
books_root.inject(@agent, books_page)
|
313
|
-
end
|
314
|
-
|
315
|
-
def get_book_page(book_uri)
|
316
|
-
@book_pages[book_uri] = @agent.get(ROOT_URI + book_uri) unless @book_pages[book_uri]
|
317
|
-
@book_pages[book_uri]
|
318
|
-
end
|
319
|
-
|
320
|
-
def get_book_name(book_uri)
|
321
|
-
get_book_page(book_uri).search('#title').text
|
322
|
-
end
|
323
|
-
|
324
|
-
def get_book_author(book_uri)
|
325
|
-
get_book_page(book_uri).search('#author_name').text
|
326
|
-
end
|
327
|
-
|
328
|
-
def get_read_date(book_uri)
|
329
|
-
book_date = Yasuri.struct_date '//*[@id="book_edit_area"]/form[1]/div[2]' do
|
330
|
-
text_year '//*[@id="read_date_y"]/option[1]', truncate: /\d+/, proc: :to_i
|
331
|
-
text_month '//*[@id="read_date_m"]/option[1]', truncate: /\d+/, proc: :to_i
|
332
|
-
text_day '//*[@id="read_date_d"]/option[1]', truncate: /\d+/, proc: :to_i
|
333
|
-
end
|
334
|
-
book_date.inject(@agent, get_book_page(book_uri))
|
335
|
-
end
|
336
|
-
|
337
|
-
def get_reread_date(book_uri)
|
338
|
-
book_reread_date = Yasuri.struct_reread_date '//*[@id="book_edit_area"]/div/form[1]/div[2]' do
|
339
|
-
text_reread_year '//div[@class="reread_box"]/form[1]/div[2]/select[1]/option[1]', truncate: /\d+/, proc: :to_i
|
340
|
-
text_reread_month '//div[@class="reread_box"]/form[1]/div[2]/select[2]/option[1]', truncate: /\d+/, proc: :to_i
|
341
|
-
text_reread_day '//div[@class="reread_box"]/form[1]/div[2]/select[3]/option[1]', truncate: /\d+/, proc: :to_i
|
342
|
-
end
|
343
|
-
book_reread_date.inject(@agent, get_book_page(book_uri))
|
344
|
-
end
|
345
|
-
|
346
|
-
def get_book_structs(page)
|
347
|
-
books = []
|
348
|
-
|
349
|
-
1.upto(NUM_BOOKS_PER_PAGE) do |i|
|
350
|
-
break if page["book_#{i}_link"].empty?
|
351
|
-
|
352
|
-
read_dates = []
|
353
|
-
read_date = get_read_date(page["book_#{i}_link"])
|
354
|
-
unless read_date.empty?
|
355
|
-
read_dates << Time.local(read_date['year'], read_date['month'], read_date['day'])
|
356
|
-
end
|
357
|
-
|
358
|
-
reread_dates = []
|
359
|
-
reread_dates << get_reread_date(page["book_#{i}_link"])
|
360
|
-
reread_dates.flatten!
|
361
|
-
|
362
|
-
unless reread_dates.empty?
|
363
|
-
reread_dates.each do |date|
|
364
|
-
read_dates << Time.local(date['reread_year'], date['reread_month'], date['reread_day'])
|
365
|
-
end
|
366
|
-
end
|
367
|
-
|
368
|
-
book_name = get_book_name(page["book_#{i}_link"])
|
369
|
-
book_author = get_book_author(page["book_#{i}_link"])
|
370
|
-
book = Book.new(book_name, book_author, read_dates)
|
371
|
-
books << book
|
372
|
-
end
|
373
|
-
|
374
|
-
books
|
375
|
-
end
|
376
|
-
|
377
|
-
def get_followings(user_id)
|
378
|
-
users = []
|
379
|
-
scraped_pages = user_id == @log_in_user_id ? scrape_followings_page(user_id)
|
380
|
-
: scrape_others_followings_page(user_id)
|
381
|
-
scraped_pages.each do |page|
|
382
|
-
users << get_user_structs(page)
|
383
|
-
users.flatten!
|
384
|
-
end
|
385
|
-
users
|
386
|
-
end
|
387
|
-
|
388
|
-
def get_followers(user_id)
|
389
|
-
users = []
|
390
|
-
scraped_pages = scrape_followers_page(user_id)
|
391
|
-
scraped_pages.each do |page|
|
392
|
-
users << get_user_structs(page)
|
393
|
-
users.flatten!
|
394
|
-
end
|
395
|
-
users
|
396
|
-
end
|
397
|
-
|
398
|
-
def get_user_structs(page)
|
399
|
-
users = []
|
400
|
-
|
401
|
-
1.upto(NUM_USERS_PER_PAGE) do |i|
|
402
|
-
break if page["user_#{i}_name"].empty?
|
403
|
-
|
404
|
-
user_name = page["user_#{i}_name"]
|
405
|
-
user_id = page["user_#{i}_link"].match(/\/u\/(\d+)$/)[1]
|
406
|
-
user = User.new(user_name, user_id)
|
407
|
-
users << user
|
408
|
-
end
|
409
|
-
|
410
|
-
users
|
411
|
-
end
|
412
|
-
|
413
|
-
def scrape_followings_page(user_id)
|
414
|
-
raise ArgumentError unless user_id =~ /^\d+$/
|
415
|
-
return [] unless logged_in?
|
416
|
-
|
417
|
-
followings_page = @agent.get(Bookmeter.followings_uri(user_id))
|
418
|
-
followings_root = Yasuri.struct_books '//*[@id="main_left"]/div' do
|
419
|
-
1.upto(NUM_USERS_PER_PAGE) do |i|
|
420
|
-
send("text_user_#{i}_name", "//*[@id=\"main_left\"]/div/div[#{i}]/a/@title")
|
421
|
-
send("text_user_#{i}_link", "//*[@id=\"main_left\"]/div/div[#{i}]/a/@href")
|
422
|
-
end
|
423
|
-
end
|
424
|
-
[followings_root.inject(@agent, followings_page)]
|
425
|
-
end
|
426
|
-
|
427
|
-
def scrape_others_followings_page(user_id)
|
428
|
-
scrape_users_listing_page(user_id, :followings_uri)
|
429
|
-
end
|
430
|
-
|
431
|
-
def scrape_followers_page(user_id)
|
432
|
-
scrape_users_listing_page(user_id, :followers_uri)
|
433
|
-
end
|
100
|
+
private
|
434
101
|
|
435
|
-
def
|
436
|
-
raise ArgumentError unless user_id =~
|
437
|
-
raise ArgumentError unless
|
438
|
-
return [] unless logged_in?
|
102
|
+
def fetch_books(user_id, uri_method)
|
103
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
104
|
+
raise ArgumentError unless BookmeterScraper.methods.include?(uri_method)
|
439
105
|
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
send("text_user_#{i}_name", "//*[@id=\"main_left\"]/div/div[#{i}]/div/div[2]/a/@title")
|
444
|
-
send("text_user_#{i}_link", "//*[@id=\"main_left\"]/div/div[#{i}]/div/div[2]/a/@href")
|
445
|
-
end
|
446
|
-
end
|
447
|
-
[root.inject(@agent, page)]
|
106
|
+
books = @scraper.fetch_books(user_id, uri_method)
|
107
|
+
books.each { |book| yield book } if block_given?
|
108
|
+
books.to_a
|
448
109
|
end
|
449
110
|
end
|
450
|
-
|
451
|
-
class BookmeterError < StandardError; end
|
452
111
|
end
|
@@ -1,11 +1,14 @@
|
|
1
|
-
require 'yaml'
|
2
|
-
|
3
1
|
module BookmeterScraper
|
4
2
|
class Configuration
|
5
|
-
|
3
|
+
attr_accessor :mail, :password
|
4
|
+
|
5
|
+
def initialize(config_file = nil)
|
6
|
+
if config_file.nil?
|
7
|
+
@mail = @password = ''
|
8
|
+
return
|
9
|
+
end
|
6
10
|
|
7
|
-
|
8
|
-
config = YAML.load_file(config_file)
|
11
|
+
config = load_yaml_file(config_file)
|
9
12
|
unless config.has_key?('mail') && config.has_key?('password')
|
10
13
|
raise ConfigurationError, "#{config_file}: Invalid configuration file"
|
11
14
|
end
|
@@ -13,6 +16,14 @@ module BookmeterScraper
|
|
13
16
|
@mail = config['mail']
|
14
17
|
@password = config['password']
|
15
18
|
end
|
19
|
+
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def load_yaml_file(config_file)
|
24
|
+
require 'yaml'
|
25
|
+
YAML.load_file(config_file)
|
26
|
+
end
|
16
27
|
end
|
17
28
|
|
18
29
|
class ConfigurationError < StandardError; end
|
@@ -0,0 +1,388 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
require 'mechanize'
|
3
|
+
require 'yasuri'
|
4
|
+
|
5
|
+
module BookmeterScraper
|
6
|
+
class Scraper
|
7
|
+
PROFILE_ATTRIBUTES = %i(
|
8
|
+
name
|
9
|
+
gender
|
10
|
+
age
|
11
|
+
blood_type
|
12
|
+
job
|
13
|
+
address
|
14
|
+
url
|
15
|
+
description
|
16
|
+
first_day
|
17
|
+
elapsed_days
|
18
|
+
read_books_count
|
19
|
+
read_pages_count
|
20
|
+
reviews_count
|
21
|
+
bookshelfs_count
|
22
|
+
)
|
23
|
+
Profile = Struct.new(*PROFILE_ATTRIBUTES)
|
24
|
+
|
25
|
+
JP_ATTRIBUTE_NAMES = {
|
26
|
+
gender: '性別',
|
27
|
+
age: '年齢',
|
28
|
+
blood_type: '血液型',
|
29
|
+
job: '職業',
|
30
|
+
address: '現住所',
|
31
|
+
url: 'URL / ブログ',
|
32
|
+
description: '自己紹介',
|
33
|
+
first_day: '記録初日',
|
34
|
+
elapsed_days: '経過日数',
|
35
|
+
read_books_count: '読んだ本',
|
36
|
+
read_pages_count: '読んだページ',
|
37
|
+
reviews_count: '感想/レビュー',
|
38
|
+
bookshelfs_count: '本棚',
|
39
|
+
}
|
40
|
+
|
41
|
+
BOOK_ATTRIBUTES = %i(name author read_dates uri image_uri)
|
42
|
+
Book = Struct.new(*BOOK_ATTRIBUTES)
|
43
|
+
class Books
|
44
|
+
extend Forwardable
|
45
|
+
|
46
|
+
def_delegator :@books, :[]
|
47
|
+
def_delegator :@books, :[]=
|
48
|
+
def_delegator :@books, :<<
|
49
|
+
def_delegator :@books, :each
|
50
|
+
def_delegator :@books, :flatten!
|
51
|
+
def_delegator :@books, :empty?
|
52
|
+
|
53
|
+
def initialize; @books = []; end
|
54
|
+
|
55
|
+
def concat(books)
|
56
|
+
books.each do |book|
|
57
|
+
next if @books.any? { |b| b.name == book.name && b.author == book.author }
|
58
|
+
@books << book
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def to_a; @books; end
|
63
|
+
end
|
64
|
+
|
65
|
+
USER_ATTRIBUTES = %i(name id uri)
|
66
|
+
User = Struct.new(*USER_ATTRIBUTES)
|
67
|
+
|
68
|
+
NUM_BOOKS_PER_PAGE = 40
|
69
|
+
NUM_USERS_PER_PAGE = 20
|
70
|
+
|
71
|
+
attr_accessor :agent
|
72
|
+
|
73
|
+
|
74
|
+
def initialize(agent = nil)
|
75
|
+
@agent = agent
|
76
|
+
@book_pages = {}
|
77
|
+
end
|
78
|
+
|
79
|
+
def fetch_profile(user_id, agent = @agent)
|
80
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
81
|
+
raise ScraperError if agent.nil?
|
82
|
+
|
83
|
+
Profile.new(*scrape_profile(user_id, agent))
|
84
|
+
end
|
85
|
+
|
86
|
+
def scrape_profile(user_id, agent)
|
87
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
88
|
+
raise ScraperError if agent.nil?
|
89
|
+
|
90
|
+
mypage = agent.get(BookmeterScraper.mypage_uri(user_id))
|
91
|
+
|
92
|
+
profile_dl_tags = mypage.search('#side_left > div.inner > div.profile > dl')
|
93
|
+
jp_attribute_names = profile_dl_tags.map { |i| i.children[0].children.text }
|
94
|
+
attribute_values = profile_dl_tags.map { |i| i.children[1].children.text }
|
95
|
+
jp_attributes = Hash[jp_attribute_names.zip(attribute_values)]
|
96
|
+
|
97
|
+
attributes = PROFILE_ATTRIBUTES.map do |attribute|
|
98
|
+
jp_attributes[JP_ATTRIBUTE_NAMES[attribute]]
|
99
|
+
end
|
100
|
+
attributes[0] = mypage.at_css('#side_left > div.inner > h3').text
|
101
|
+
|
102
|
+
attributes
|
103
|
+
end
|
104
|
+
|
105
|
+
def fetch_books(user_id, uri_method, agent = @agent)
|
106
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
107
|
+
raise ArgumentError unless BookmeterScraper.methods.include?(uri_method)
|
108
|
+
raise ScraperError if agent.nil?
|
109
|
+
return [] unless agent.logged_in?
|
110
|
+
|
111
|
+
books = Books.new
|
112
|
+
scraped_pages = scrape_books_pages(user_id, uri_method)
|
113
|
+
scraped_pages.each do |page|
|
114
|
+
books << extract_books(page)
|
115
|
+
books.flatten!
|
116
|
+
end
|
117
|
+
books
|
118
|
+
end
|
119
|
+
|
120
|
+
def scrape_books_pages(user_id, uri_method, agent = @agent)
|
121
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
122
|
+
raise ArgumentError unless BookmeterScraper.methods.include?(uri_method)
|
123
|
+
raise ScraperError if agent.nil?
|
124
|
+
return [] unless agent.logged_in?
|
125
|
+
|
126
|
+
books_page = agent.get(BookmeterScraper.method(uri_method).call(user_id))
|
127
|
+
|
128
|
+
# if books are not found at all
|
129
|
+
return [] if books_page.search('#main_left > div > center > a').empty?
|
130
|
+
|
131
|
+
if books_page.search('span.now_page').empty?
|
132
|
+
books_root = Yasuri.struct_books '//*[@id="main_left"]/div' do
|
133
|
+
1.upto(NUM_BOOKS_PER_PAGE) do |i|
|
134
|
+
send("text_book_#{i}_name", "//*[@id=\"main_left\"]/div/div[#{i + 1}]/div[2]/a")
|
135
|
+
send("text_book_#{i}_link", "//*[@id=\"main_left\"]/div/div[#{i + 1}]/div[2]/a/@href")
|
136
|
+
end
|
137
|
+
end
|
138
|
+
return [books_root.inject(agent, books_page)]
|
139
|
+
end
|
140
|
+
|
141
|
+
books_root = Yasuri.pages_root '//span[@class="now_page"]/following-sibling::span[1]/a' do
|
142
|
+
text_page_index '//span[@class="now_page"]/a'
|
143
|
+
1.upto(NUM_BOOKS_PER_PAGE) do |i|
|
144
|
+
send("text_book_#{i}_name", "//*[@id=\"main_left\"]/div/div[#{i + 1}]/div[2]/a")
|
145
|
+
send("text_book_#{i}_link", "//*[@id=\"main_left\"]/div/div[#{i + 1}]/div[2]/a/@href")
|
146
|
+
end
|
147
|
+
end
|
148
|
+
books_root.inject(agent, books_page)
|
149
|
+
end
|
150
|
+
|
151
|
+
def extract_books(page)
|
152
|
+
raise ArgumentError if page.nil?
|
153
|
+
|
154
|
+
books = []
|
155
|
+
1.upto(NUM_BOOKS_PER_PAGE) do |i|
|
156
|
+
break if page["book_#{i}_link"].empty?
|
157
|
+
|
158
|
+
read_dates = []
|
159
|
+
read_date = scrape_read_date(page["book_#{i}_link"])
|
160
|
+
unless read_date.empty?
|
161
|
+
read_dates << Time.local(read_date['year'], read_date['month'], read_date['day'])
|
162
|
+
end
|
163
|
+
|
164
|
+
reread_dates = []
|
165
|
+
reread_dates << scrape_reread_date(page["book_#{i}_link"])
|
166
|
+
reread_dates.flatten!
|
167
|
+
|
168
|
+
unless reread_dates.empty?
|
169
|
+
reread_dates.each do |date|
|
170
|
+
read_dates << Time.local(date['reread_year'], date['reread_month'], date['reread_day'])
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
book_path = page["book_#{i}_link"]
|
175
|
+
book_name = scrape_book_name(book_path)
|
176
|
+
book_author = scrape_book_author(book_path)
|
177
|
+
book_image_uri = scrape_book_image_uri(book_path)
|
178
|
+
book = Book.new(book_name,
|
179
|
+
book_author,
|
180
|
+
read_dates,
|
181
|
+
ROOT_URI + book_path,
|
182
|
+
book_image_uri)
|
183
|
+
books << book
|
184
|
+
end
|
185
|
+
|
186
|
+
books
|
187
|
+
end
|
188
|
+
|
189
|
+
def fetch_read_books(user_id, target_year_month)
|
190
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
191
|
+
raise ArgumentError if target_year_month.nil?
|
192
|
+
|
193
|
+
result = Books.new
|
194
|
+
scrape_books_pages(user_id, :read_books_uri).each do |page|
|
195
|
+
first_book_date = scrape_read_date(page['book_1_link'])
|
196
|
+
last_book_date = get_last_book_date(page)
|
197
|
+
|
198
|
+
first_book_year_month = Time.local(first_book_date['year'].to_i, first_book_date['month'].to_i)
|
199
|
+
last_book_year_month = Time.local(last_book_date['year'].to_i, last_book_date['month'].to_i)
|
200
|
+
|
201
|
+
if target_year_month < last_book_year_month
|
202
|
+
next
|
203
|
+
elsif target_year_month == first_book_year_month && target_year_month > last_book_year_month
|
204
|
+
result.concat(fetch_target_books(target_year_month, page))
|
205
|
+
break
|
206
|
+
elsif target_year_month < first_book_year_month && target_year_month > last_book_year_month
|
207
|
+
result.concat(fetch_target_books(target_year_month, page))
|
208
|
+
break
|
209
|
+
elsif target_year_month <= first_book_year_month && target_year_month >= last_book_year_month
|
210
|
+
result.concat(fetch_target_books(target_year_month, page))
|
211
|
+
elsif target_year_month > first_book_year_month
|
212
|
+
break
|
213
|
+
end
|
214
|
+
end
|
215
|
+
result
|
216
|
+
end
|
217
|
+
|
218
|
+
def get_last_book_date(page)
|
219
|
+
raise ArgumentError if page.nil?
|
220
|
+
|
221
|
+
NUM_BOOKS_PER_PAGE.downto(1) do |i|
|
222
|
+
link = page["book_#{i}_link"]
|
223
|
+
next if link.empty?
|
224
|
+
return scrape_read_date(link)
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
def fetch_target_books(target_year_month, page)
|
229
|
+
raise ArgumentError if target_year_month.nil?
|
230
|
+
raise ArgumentError if page.nil?
|
231
|
+
|
232
|
+
target_books = Books.new
|
233
|
+
1.upto(NUM_BOOKS_PER_PAGE) do |i|
|
234
|
+
next if page["book_#{i}_link"].empty?
|
235
|
+
|
236
|
+
read_year_months = []
|
237
|
+
read_date = scrape_read_date(page["book_#{i}_link"])
|
238
|
+
read_dates = [Time.local(read_date['year'], read_date['month'], read_date['day'])]
|
239
|
+
read_year_months << Time.local(read_date['year'], read_date['month'])
|
240
|
+
|
241
|
+
reread_dates = []
|
242
|
+
reread_dates << scrape_reread_date(page["book_#{i}_link"])
|
243
|
+
reread_dates.flatten!
|
244
|
+
|
245
|
+
unless reread_dates.empty?
|
246
|
+
reread_dates.each do |date|
|
247
|
+
read_year_months << Time.local(date['reread_year'], date['reread_month'])
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
next unless read_year_months.include?(target_year_month)
|
252
|
+
|
253
|
+
unless reread_dates.empty?
|
254
|
+
reread_dates.each do |date|
|
255
|
+
read_dates << Time.local(date['reread_year'], date['reread_month'], date['reread_day'])
|
256
|
+
end
|
257
|
+
end
|
258
|
+
book_path = page["book_#{i}_link"]
|
259
|
+
book_name = scrape_book_name(book_path)
|
260
|
+
book_author = scrape_book_author(book_path)
|
261
|
+
book_image_uri = scrape_book_image_uri(book_path)
|
262
|
+
target_books << Book.new(book_name, book_author, read_dates, ROOT_URI + book_path, book_image_uri)
|
263
|
+
end
|
264
|
+
|
265
|
+
target_books
|
266
|
+
end
|
267
|
+
|
268
|
+
def get_book_page(book_uri, agent = @agent)
|
269
|
+
@book_pages[book_uri] = agent.get(ROOT_URI + book_uri) unless @book_pages[book_uri]
|
270
|
+
@book_pages[book_uri]
|
271
|
+
end
|
272
|
+
|
273
|
+
def scrape_book_name(book_uri)
|
274
|
+
get_book_page(book_uri).search('#title').text
|
275
|
+
end
|
276
|
+
|
277
|
+
def scrape_book_author(book_uri)
|
278
|
+
get_book_page(book_uri).search('#author_name').text
|
279
|
+
end
|
280
|
+
|
281
|
+
def scrape_book_image_uri(book_uri)
|
282
|
+
get_book_page(book_uri).search('//*[@id="book_image"]/@src').text
|
283
|
+
end
|
284
|
+
|
285
|
+
def scrape_read_date(book_uri, agent = @agent)
|
286
|
+
book_date = Yasuri.struct_date '//*[@id="book_edit_area"]/form[1]/div[2]' do
|
287
|
+
text_year '//*[@id="read_date_y"]/option[1]', truncate: /\d+/, proc: :to_i
|
288
|
+
text_month '//*[@id="read_date_m"]/option[1]', truncate: /\d+/, proc: :to_i
|
289
|
+
text_day '//*[@id="read_date_d"]/option[1]', truncate: /\d+/, proc: :to_i
|
290
|
+
end
|
291
|
+
book_date.inject(agent, get_book_page(book_uri))
|
292
|
+
end
|
293
|
+
|
294
|
+
def scrape_reread_date(book_uri, agent = @agent)
|
295
|
+
book_reread_date = Yasuri.struct_reread_date '//*[@id="book_edit_area"]/div/form[1]/div[2]' do
|
296
|
+
text_reread_year '//div[@class="reread_box"]/form[1]/div[2]/select[1]/option[1]', truncate: /\d+/, proc: :to_i
|
297
|
+
text_reread_month '//div[@class="reread_box"]/form[1]/div[2]/select[2]/option[1]', truncate: /\d+/, proc: :to_i
|
298
|
+
text_reread_day '//div[@class="reread_box"]/form[1]/div[2]/select[3]/option[1]', truncate: /\d+/, proc: :to_i
|
299
|
+
end
|
300
|
+
book_reread_date.inject(agent, get_book_page(book_uri))
|
301
|
+
end
|
302
|
+
|
303
|
+
def fetch_followings(user_id, agent = @agent)
|
304
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
305
|
+
raise ScraperError if agent.nil?
|
306
|
+
return [] unless agent.logged_in?
|
307
|
+
|
308
|
+
users = []
|
309
|
+
scraped_pages = user_id == agent.log_in_user_id ? scrape_followings_page(user_id)
|
310
|
+
: scrape_others_followings_page(user_id)
|
311
|
+
scraped_pages.each do |page|
|
312
|
+
users << extract_users(page)
|
313
|
+
users.flatten!
|
314
|
+
end
|
315
|
+
users
|
316
|
+
end
|
317
|
+
|
318
|
+
def fetch_followers(user_id, agent = @agent)
|
319
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
320
|
+
raise ScraperError if agent.nil?
|
321
|
+
return [] unless agent.logged_in?
|
322
|
+
|
323
|
+
users = []
|
324
|
+
scraped_pages = scrape_followers_page(user_id)
|
325
|
+
scraped_pages.each do |page|
|
326
|
+
users << extract_users(page)
|
327
|
+
users.flatten!
|
328
|
+
end
|
329
|
+
users
|
330
|
+
end
|
331
|
+
|
332
|
+
def scrape_followings_page(user_id, agent = @agent)
|
333
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
334
|
+
return [] unless agent.logged_in?
|
335
|
+
|
336
|
+
followings_page = agent.get(BookmeterScraper.followings_uri(user_id))
|
337
|
+
followings_root = Yasuri.struct_books '//*[@id="main_left"]/div' do
|
338
|
+
1.upto(NUM_USERS_PER_PAGE) do |i|
|
339
|
+
send("text_user_#{i}_name", "//*[@id=\"main_left\"]/div/div[#{i}]/a/@title")
|
340
|
+
send("text_user_#{i}_link", "//*[@id=\"main_left\"]/div/div[#{i}]/a/@href")
|
341
|
+
end
|
342
|
+
end
|
343
|
+
[followings_root.inject(agent, followings_page)]
|
344
|
+
end
|
345
|
+
|
346
|
+
def scrape_others_followings_page(user_id)
|
347
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
348
|
+
scrape_users_listing_page(user_id, :followings_uri)
|
349
|
+
end
|
350
|
+
|
351
|
+
def scrape_followers_page(user_id)
|
352
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
353
|
+
scrape_users_listing_page(user_id, :followers_uri)
|
354
|
+
end
|
355
|
+
|
356
|
+
def scrape_users_listing_page(user_id, uri_method, agent = @agent)
|
357
|
+
raise ArgumentError unless user_id =~ USER_ID_REGEX
|
358
|
+
raise ArgumentError unless BookmeterScraper.methods.include?(uri_method)
|
359
|
+
return [] unless agent.logged_in?
|
360
|
+
|
361
|
+
page = agent.get(BookmeterScraper.method(uri_method).call(user_id))
|
362
|
+
root = Yasuri.struct_users '//*[@id="main_left"]/div' do
|
363
|
+
1.upto(NUM_USERS_PER_PAGE) do |i|
|
364
|
+
send("text_user_#{i}_name", "//*[@id=\"main_left\"]/div/div[#{i}]/div/div[2]/a/@title")
|
365
|
+
send("text_user_#{i}_link", "//*[@id=\"main_left\"]/div/div[#{i}]/div/div[2]/a/@href")
|
366
|
+
end
|
367
|
+
end
|
368
|
+
[root.inject(agent, page)]
|
369
|
+
end
|
370
|
+
|
371
|
+
def extract_users(page)
|
372
|
+
raise ArgumentError if page.nil?
|
373
|
+
|
374
|
+
users = []
|
375
|
+
1.upto(NUM_USERS_PER_PAGE) do |i|
|
376
|
+
break if page["user_#{i}_name"].empty?
|
377
|
+
|
378
|
+
user_name = page["user_#{i}_name"]
|
379
|
+
user_id = page["user_#{i}_link"].match(/\/u\/(\d+)$/)[1]
|
380
|
+
users << User.new(user_name, user_id, ROOT_URI + "/u/#{user_id}")
|
381
|
+
end
|
382
|
+
|
383
|
+
users
|
384
|
+
end
|
385
|
+
end
|
386
|
+
|
387
|
+
class ScraperError < StandardError; end
|
388
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bookmeter_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kohei Yamamoto
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-03-
|
11
|
+
date: 2016-03-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -115,8 +115,10 @@ files:
|
|
115
115
|
- bookmeter_scraper.gemspec
|
116
116
|
- exe/bookmeter_scraper
|
117
117
|
- lib/bookmeter_scraper.rb
|
118
|
+
- lib/bookmeter_scraper/agent.rb
|
118
119
|
- lib/bookmeter_scraper/bookmeter.rb
|
119
120
|
- lib/bookmeter_scraper/configuration.rb
|
121
|
+
- lib/bookmeter_scraper/scraper.rb
|
120
122
|
- lib/bookmeter_scraper/version.rb
|
121
123
|
homepage: https://github.com/kymmt90/bookmeter_scraper
|
122
124
|
licenses:
|
@@ -138,7 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
138
140
|
version: '0'
|
139
141
|
requirements: []
|
140
142
|
rubyforge_project:
|
141
|
-
rubygems_version: 2.
|
143
|
+
rubygems_version: 2.5.1
|
142
144
|
signing_key:
|
143
145
|
specification_version: 4
|
144
146
|
summary: Bookmeter scraping library
|