taaze 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +80 -0
- data/.travis.yml +8 -0
- data/Gemfile +11 -0
- data/LICENSE +22 -0
- data/README.md +56 -0
- data/Rakefile +8 -0
- data/example.jpg +0 -0
- data/lib/taaze.rb +3 -0
- data/lib/taaze/collections.rb +56 -0
- data/lib/taaze/comments.rb +120 -0
- data/lib/taaze/tags.rb +38 -0
- data/lib/taaze/version.rb +5 -0
- data/spec/collections_spec.rb +32 -0
- data/spec/comments_spec.rb +32 -0
- data/spec/fixures/collections.yml +95 -0
- data/spec/fixures/comments.yml +47 -0
- data/spec/fixures/tags.yml +16 -0
- data/spec/tags_spec.rb +31 -0
- data/taaze.gemspec +18 -0
- metadata +28 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 503d21509d28f6411ef50ff8b1ffba0811340922
|
4
|
+
data.tar.gz: ecdce9af22a15b483e625ac71f986b3a405619c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5b197a82f8aed1e829b999339127b49f142d16c60b284d86bed60052333bae8d9879ec237e0a98f94d1772605ecdbab1cf9abb21adae84f845b62925751e3158
|
7
|
+
data.tar.gz: e3845845ca6638c0e512f285a2d4c0d02d5d40e770a0939f05fa52a55e4051acb4fb6a4caae352b4c075e4853cd411036a30694875e05780b2a6e51a777c258d
|
data/.gitignore
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
# Created by https://www.gitignore.io/api/ruby,osx,vim
|
2
|
+
|
3
|
+
### Ruby ###
|
4
|
+
*.gem
|
5
|
+
*.rbc
|
6
|
+
/.config
|
7
|
+
/coverage/
|
8
|
+
/InstalledFiles
|
9
|
+
/pkg/
|
10
|
+
/spec/reports/
|
11
|
+
/spec/examples.txt
|
12
|
+
/test/tmp/
|
13
|
+
/test/version_tmp/
|
14
|
+
/tmp/
|
15
|
+
Gemfile.lock
|
16
|
+
|
17
|
+
## Specific to RubyMotion:
|
18
|
+
.dat*
|
19
|
+
.repl_history
|
20
|
+
build/
|
21
|
+
|
22
|
+
## VCR
|
23
|
+
/spec/fixures/vcr_cassettes
|
24
|
+
|
25
|
+
## Documentation cache and generated files:
|
26
|
+
/.yardoc/
|
27
|
+
/_yardoc/
|
28
|
+
/doc/
|
29
|
+
/rdoc/
|
30
|
+
|
31
|
+
## Environment normalisation:
|
32
|
+
/.bundle/
|
33
|
+
/vendor/bundle
|
34
|
+
/lib/bundler/man/
|
35
|
+
|
36
|
+
# for a library or gem, you might want to ignore these files since the code is
|
37
|
+
# intended to run in multiple environments; otherwise, check them in:
|
38
|
+
# Gemfile.lock
|
39
|
+
# .ruby-version
|
40
|
+
# .ruby-gemset
|
41
|
+
|
42
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
43
|
+
.rvmrc
|
44
|
+
|
45
|
+
|
46
|
+
### OSX ###
|
47
|
+
.DS_Store
|
48
|
+
.AppleDouble
|
49
|
+
.LSOverride
|
50
|
+
|
51
|
+
# Icon must end with two \r
|
52
|
+
Icon
|
53
|
+
|
54
|
+
|
55
|
+
# Thumbnails
|
56
|
+
._*
|
57
|
+
|
58
|
+
# Files that might appear in the root of a volume
|
59
|
+
.DocumentRevisions-V100
|
60
|
+
.fseventsd
|
61
|
+
.Spotlight-V100
|
62
|
+
.TemporaryItems
|
63
|
+
.Trashes
|
64
|
+
.VolumeIcon.icns
|
65
|
+
|
66
|
+
# Directories potentially created on remote AFP share
|
67
|
+
.AppleDB
|
68
|
+
.AppleDesktop
|
69
|
+
Network Trash Folder
|
70
|
+
Temporary Items
|
71
|
+
.apdisk
|
72
|
+
|
73
|
+
|
74
|
+
### Vim ###
|
75
|
+
[._]*.s[a-w][a-z]
|
76
|
+
[._]s[a-w][a-z]
|
77
|
+
*.un~
|
78
|
+
Session.vim
|
79
|
+
.netrwhist
|
80
|
+
*~
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 BUEZE
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
22
|
+
|
data/README.md
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
# taaze [![Build Status](https://travis-ci.org/BUEZE/taaze.svg?branch=master)](https://travis-ci.org/BUEZE/taaze)
|
2
|
+
|
3
|
+
---
|
4
|
+
Taaze is a light package wrote by Ruby. It easy to use for getting comments or collections from online bookstore [Taaze | 讀冊生活](http://www.taaze.tw/index.html).
|
5
|
+
|
6
|
+
### How To Get Started
|
7
|
+
Install by Ruby gem [Taaze](https://rubygems.org/gems/taaze)
|
8
|
+
|
9
|
+
`gem install taaze`
|
10
|
+
|
11
|
+
|
12
|
+
### Requirements
|
13
|
+
- [Nokogiri](http://nokogiri.org) - for parsing html use
|
14
|
+
- [JSON](http://ruby-doc.org/stdlib-2.0.0/libdoc/json/rdoc/JSON.html) - parsing json data
|
15
|
+
|
16
|
+
`$ bundle install`
|
17
|
+
|
18
|
+
### Usage
|
19
|
+
This gem may be used as a command line utility or called from code
|
20
|
+
|
21
|
+
#### How to get ID
|
22
|
+
##### User's comment
|
23
|
+
- You can get user list at [冊格子](http://www.taaze.tw/zekea_index.html)
|
24
|
+
- In every user's personal page , you can get his or her ID from URL
|
25
|
+
- For Example :
|
26
|
+
- http://www.taaze.tw/container_zekeaclt_view.html?ci=13313301
|
27
|
+
- 'ci=' is User ID
|
28
|
+
- Like the user id, you can also get any books' ID from the URL
|
29
|
+
- And there are 3 argument for use
|
30
|
+
- --comments : get all comments for the user
|
31
|
+
- --collections : get all book collection for the user
|
32
|
+
- --tags : get all tags of the book
|
33
|
+
- And add yaml path where you want to store
|
34
|
+
- --output [file_path]
|
35
|
+
- Finally the Taaze will return yaml format file
|
36
|
+
|
37
|
+
### CLI
|
38
|
+
|
39
|
+
`taaze --comments [user_id] --output [file_path]` OR
|
40
|
+
|
41
|
+
`taaze --collections [user_id] --output [file_path]` OR
|
42
|
+
|
43
|
+
`taaze --tags [book_id] --output [file_path]`
|
44
|
+
|
45
|
+
### For Example
|
46
|
+
|
47
|
+
`taaze --comments 12522728 --output ./comments.yml`
|
48
|
+
|
49
|
+
`taaze --collections 12522728 --output ./collections.yml`
|
50
|
+
|
51
|
+
`taaze --tags 11100763435 --output ./tags.yml`
|
52
|
+
|
53
|
+
|
54
|
+
### Output Sample
|
55
|
+
|
56
|
+
![example.jpg](example.jpg)
|
data/Rakefile
ADDED
data/example.jpg
ADDED
Binary file
|
data/lib/taaze.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'date'
|
4
|
+
|
5
|
+
module Taaze
|
6
|
+
# This class get the user personal page as an input
|
7
|
+
# return a hash of user's collections information
|
8
|
+
# For example, with user's url: http://www.taaze.tw/container_zekeaclt_view.html?ci=12522728
|
9
|
+
# here's the user's collections of books: http://www.taaze.tw/container_zekeaclt_view.html?ci=12522728&cp=1
|
10
|
+
# Sampel output (plz remove them after passing the test):
|
11
|
+
# [
|
12
|
+
# {"title"=>"村上收音機", "book_url"=>"http://www.taaze.tw/sing.html?pid=11100635522"}
|
13
|
+
# {"title"=>"尋找漩渦貓的方法", "book_url"=>"http://www.taaze.tw/sing.html?pid=11100210251"}
|
14
|
+
# and so on ...
|
15
|
+
# ]
|
16
|
+
class TaazeCollections
|
17
|
+
USERS_URL = 'http://www.taaze.tw/container_zekeaclt_view.html?ci='
|
18
|
+
API_URL = 'http://www.taaze.tw/beta/zekeacltBooksDataAgent.jsp?custId='
|
19
|
+
BOOK_URL = 'http://www.taaze.tw/sing.html?pid='
|
20
|
+
|
21
|
+
def initialize(user_id)
|
22
|
+
parse_html(user_id)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Return a hash of user's collections
|
26
|
+
def collections
|
27
|
+
@collections ||= extract_books
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
# Parse the html of the url page.
|
33
|
+
def parse_html(user_id)
|
34
|
+
url = API_URL + user_id
|
35
|
+
first_doc = JSON.parse(open(url).read)
|
36
|
+
totalsize = first_doc['totalsize']
|
37
|
+
|
38
|
+
url = API_URL + user_id + '&startNum=0&endNum=' + totalsize.to_s
|
39
|
+
@doc = JSON.parse(open(url).read)['result1']
|
40
|
+
end
|
41
|
+
|
42
|
+
# Return the books in the format specified in spec.
|
43
|
+
def extract_books
|
44
|
+
booklist = []
|
45
|
+
if @doc.count != 0
|
46
|
+
@doc.each do |book_data|
|
47
|
+
book = {}
|
48
|
+
book['title'] = book_data['titleMain']
|
49
|
+
book['book_url'] = BOOK_URL + book_data['prodId']
|
50
|
+
booklist << book
|
51
|
+
end
|
52
|
+
end
|
53
|
+
booklist
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'date'
|
4
|
+
require 'json'
|
5
|
+
require 'net/http'
|
6
|
+
|
7
|
+
# Module Taaze
|
8
|
+
module Taaze
|
9
|
+
# This class get the user personal page as an input
|
10
|
+
# return a hash of user's collections information
|
11
|
+
# For example, with user's url: http://www.taaze.tw/container_zekeaclt_view.html?ci=12522728
|
12
|
+
# here's the user's comments of books: http://www.taaze.tw/container_zekeaclt_view.html?ci=12522728&cp=2
|
13
|
+
# Sampel output (plz remove them after passing the test):
|
14
|
+
class TaazeComments
|
15
|
+
USERS_URL = 'http://www.taaze.tw/container_zekeaclt_view.html?ci='
|
16
|
+
MAIN_URL = 'http://www.taaze.tw/container_zekeaclt_view.html?'
|
17
|
+
API_URL = 'http://www.taaze.tw/beta/zekeaCommentDataAgent.jsp?custId='
|
18
|
+
BOOK_URL = 'http://www.taaze.tw/sing.html?pid='
|
19
|
+
|
20
|
+
def initialize(user_id)
|
21
|
+
user_id = numeric?(user_id) ? user_id.to_s : user_id
|
22
|
+
parse_html(user_id)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Return a hash of user's comments
|
26
|
+
def comments
|
27
|
+
@comments_found
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
# Check if it is a integer
|
33
|
+
def numeric?(obj)
|
34
|
+
obj.to_s.match(/\A[+-]?\d+?(\.\d+)?\Z/) == nil? ? false : true
|
35
|
+
end
|
36
|
+
|
37
|
+
# Parse the html of the url page.
|
38
|
+
def split_url(url)
|
39
|
+
str = split_get_last(split_get_last(url, '/'), '?')
|
40
|
+
str = split_get_last(str, '&', -1)
|
41
|
+
str
|
42
|
+
end
|
43
|
+
|
44
|
+
# split string and get last
|
45
|
+
# index for
|
46
|
+
# -1 -> return all array
|
47
|
+
# 0 -> return last obj in array
|
48
|
+
# >=1 -> return obj on array[index]
|
49
|
+
def split_get_last(str_to_split, character, index = 0)
|
50
|
+
arrtmp = str_to_split.split(character)
|
51
|
+
if index == -1
|
52
|
+
arrtmp
|
53
|
+
else
|
54
|
+
index = index == 0 ? arrtmp.length - 1 : index
|
55
|
+
strtmp = arrtmp[index]
|
56
|
+
strtmp
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Parse the html of the url page.
|
61
|
+
def parse_html(user_id)
|
62
|
+
url = API_URL + user_id.to_s + '&startNum=0&endNum=0'
|
63
|
+
# first test and get comments num for the user
|
64
|
+
first_doc = JSON.parse(url_get_html(url))
|
65
|
+
total_num = first_doc['totalsize']
|
66
|
+
|
67
|
+
url = API_URL + user_id.to_s + '&startNum=0&endNum=' + total_num
|
68
|
+
# mainDoc is the all json date for the user's all comment
|
69
|
+
main_doc = JSON.parse(url_get_html(url))
|
70
|
+
extract_comments(main_doc['result1'], user_id)
|
71
|
+
end
|
72
|
+
|
73
|
+
# Send url to get response
|
74
|
+
def url_get_html(url_str)
|
75
|
+
url = URI.parse(URI.encode(url_str)) # first get total size
|
76
|
+
req = Net::HTTP::Get.new(url.to_s)
|
77
|
+
res = Net::HTTP.start(url.host, url.port) { |http| http.request(req) }
|
78
|
+
res.body
|
79
|
+
end
|
80
|
+
|
81
|
+
# Return the comments in the format specified in spec.
|
82
|
+
def extract_comments(content, user_id)
|
83
|
+
# Json format~
|
84
|
+
# "content":"勇氣就是熱誠,來自於我們對自己工作的自信心;.....",
|
85
|
+
# "title":"",
|
86
|
+
# "status":"C",
|
87
|
+
# "stars":"5",
|
88
|
+
# "prodId":"11100597685",
|
89
|
+
# "titleMain":"行銷之神原一平全集(精裝版)",
|
90
|
+
# "orgProdId":"11100597685",
|
91
|
+
# "pkNo":"1000243977",
|
92
|
+
# "mdf_time":"2015/10/15",
|
93
|
+
# "crt_time":"2015/10/15"
|
94
|
+
#
|
95
|
+
# orgProdId -> bookID -> 11100597685
|
96
|
+
# title -> book title
|
97
|
+
# content -> comment
|
98
|
+
# comment_url
|
99
|
+
# pkNo -> comment ID ->13313301
|
100
|
+
#
|
101
|
+
# http://www.taaze.tw/container_zekeaclt_view.html?co=1000238964&ci=12522728&cp=3
|
102
|
+
# co->comment ci->user
|
103
|
+
data_arr = []
|
104
|
+
if content
|
105
|
+
content.each do |cmtItem|
|
106
|
+
data_hash_sub = Hash.new {}
|
107
|
+
data_hash_sub['title'] = cmtItem['titleMain']
|
108
|
+
data_hash_sub['comment'] = cmtItem['content']
|
109
|
+
data_hash_sub['book_url'] = BOOK_URL + cmtItem['orgProdId']
|
110
|
+
url = MAIN_URL + 'co=' + cmtItem['pkNo'] + '&ci=' + user_id + '&cp=3'
|
111
|
+
data_hash_sub['comment_url'] = url
|
112
|
+
data_arr.push(data_hash_sub)
|
113
|
+
end
|
114
|
+
else
|
115
|
+
data_arr = []
|
116
|
+
end
|
117
|
+
@comments_found ||= data_arr
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
data/lib/taaze/tags.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module Taaze
|
5
|
+
# This class get the product id as an input
|
6
|
+
# return a list of tags
|
7
|
+
# Sample input with product id : 11100763252
|
8
|
+
# Sample output ['轉型正義', '白色恐怖', '社會科學', '臺灣民主運動']
|
9
|
+
|
10
|
+
class TaazeBooktags
|
11
|
+
BOOKS_URL = 'http://www.taaze.tw/sing.html?pid='
|
12
|
+
|
13
|
+
def initialize(book_id)
|
14
|
+
parse_html(book_id)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Return a list of book's tags
|
18
|
+
def tags
|
19
|
+
@tags ||= extract_tags
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
# parse the html
|
25
|
+
def parse_html(book_id)
|
26
|
+
url = BOOKS_URL + book_id
|
27
|
+
@document = Nokogiri::HTML(open(url))
|
28
|
+
end
|
29
|
+
|
30
|
+
def extract_tags
|
31
|
+
tags = []
|
32
|
+
@document.xpath('//a[@class="tag"]').each do |t|
|
33
|
+
tags << t.text
|
34
|
+
end
|
35
|
+
tags
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'minitest/autorun'
|
3
|
+
require 'vcr'
|
4
|
+
require 'webmock/minitest'
|
5
|
+
require_relative '../lib/taaze.rb'
|
6
|
+
|
7
|
+
USER_ID = %w(12522728 13193872)
|
8
|
+
collections_from_file = YAML.load(File.read('./spec/fixures/collections.yml'))
|
9
|
+
|
10
|
+
VCR.configure do |config|
|
11
|
+
config.cassette_library_dir = 'spec/fixures/vcr_cassettes'
|
12
|
+
config.hook_into :webmock
|
13
|
+
end
|
14
|
+
|
15
|
+
VCR.use_cassette('collections') do
|
16
|
+
USER_ID.each do |user_id|
|
17
|
+
collections_found = Taaze::TaazeCollections.new(user_id).collections
|
18
|
+
|
19
|
+
describe "Get all the collections with specific user's id" do
|
20
|
+
it 'has the right number of collections' do
|
21
|
+
collections_found.size.must_equal collections_from_file[user_id].size
|
22
|
+
end
|
23
|
+
|
24
|
+
collections_found.map do |book|
|
25
|
+
it 'finds book in the test collection' do
|
26
|
+
exist = collections_from_file[user_id].include? book
|
27
|
+
[book, exist].must_equal [book, true]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'minitest/autorun'
|
3
|
+
require 'vcr'
|
4
|
+
require 'webmock/minitest'
|
5
|
+
require_relative '../lib/taaze.rb'
|
6
|
+
|
7
|
+
USER_ID = %w(12522728 13193872)
|
8
|
+
comments_from_file = YAML.load(File.read('./spec/fixures/comments.yml'))
|
9
|
+
|
10
|
+
VCR.configure do |config|
|
11
|
+
config.cassette_library_dir = 'spec/fixures/vcr_cassettes'
|
12
|
+
config.hook_into :webmock
|
13
|
+
end
|
14
|
+
|
15
|
+
VCR.use_cassette('comments') do
|
16
|
+
USER_ID.each do |user_id|
|
17
|
+
comments_found = Taaze::TaazeComments.new(user_id).comments
|
18
|
+
|
19
|
+
describe "Get all the comments with specific user's id" do
|
20
|
+
it 'has the right number of comments' do
|
21
|
+
comments_found.size.must_equal comments_from_file[user_id].size
|
22
|
+
end
|
23
|
+
|
24
|
+
comments_found.map do |comment|
|
25
|
+
it 'finds comments in the test comments' do
|
26
|
+
exist = comments_from_file[user_id].include? comment
|
27
|
+
[comment, exist].must_equal [comment, true]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
---
|
2
|
+
'12522728':
|
3
|
+
- title: '村上收音機'
|
4
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100635522'
|
5
|
+
- title: '尋找漩渦貓的方法'
|
6
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100210251'
|
7
|
+
- title: '張愛玲的世界'
|
8
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100030671'
|
9
|
+
- title: '拜訪糖果阿姨'
|
10
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100653796'
|
11
|
+
- title: 'A夢'
|
12
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100732992'
|
13
|
+
- title: '河流'
|
14
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100683568'
|
15
|
+
- title: '紅樓夢校注彩畫本(1-3冊合售)'
|
16
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100161589'
|
17
|
+
- title: '傾城之戀【張愛玲典藏新版】'
|
18
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100228548'
|
19
|
+
- title: '金烏'
|
20
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100677389'
|
21
|
+
- title: '臺灣成長小說選(增訂版)'
|
22
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100649825'
|
23
|
+
- title: '靈魂的領地:國民散文讀本 '
|
24
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100653909'
|
25
|
+
- title: '懸崖上的花園:太平洋戰爭時期上海文學場域(1942-1945)'
|
26
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100672028'
|
27
|
+
- title: '九重葛與美少年'
|
28
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100657709'
|
29
|
+
- title: '春雪:豐饒之海四部曲(1)'
|
30
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100034168'
|
31
|
+
- title: '魯迅小說集'
|
32
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100067679'
|
33
|
+
- title: '波赫士(1-4冊合售合售)'
|
34
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100125593'
|
35
|
+
- title: '奇萊前書'
|
36
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100067730'
|
37
|
+
- title: '半生緣【張愛玲典藏新版】'
|
38
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100192438'
|
39
|
+
- title: '瓊美卡隨想錄(散文)'
|
40
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100616297'
|
41
|
+
- title: '犄角'
|
42
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100617464'
|
43
|
+
- title: '疾病的隱喻 '
|
44
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100625913'
|
45
|
+
- title: '古都'
|
46
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100268249'
|
47
|
+
- title: '觀看的方式'
|
48
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100238376'
|
49
|
+
- title: '臺灣成長小說選'
|
50
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100035125'
|
51
|
+
- title: '晨讀10分鐘:青春無敵早點詩:中學生新詩選'
|
52
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100631660'
|
53
|
+
- title: '屏息的文明'
|
54
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100034203'
|
55
|
+
- title: '雲和'
|
56
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100034334'
|
57
|
+
- title: '海風野火花'
|
58
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100249859'
|
59
|
+
- title: '你的聲音充滿時間'
|
60
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100276696'
|
61
|
+
- title: '離與苦:戰爭的延續'
|
62
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100254818'
|
63
|
+
- title: '少女維特'
|
64
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100241234'
|
65
|
+
- title: '搜索者'
|
66
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100067568'
|
67
|
+
- title: '浮生草'
|
68
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100591888'
|
69
|
+
- title: '溫莎墓園日記(小說)'
|
70
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100617237'
|
71
|
+
- title: '漫長的告別'
|
72
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100210749'
|
73
|
+
- title: '雙月記'
|
74
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100236244'
|
75
|
+
- title: '愛情的盡頭'
|
76
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100210189'
|
77
|
+
- title: '我們在此相遇'
|
78
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100154779'
|
79
|
+
- title: '月球姓氏(聯合文學經典版)'
|
80
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100220368'
|
81
|
+
- title: '小團圓'
|
82
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100192439'
|
83
|
+
- title: '瑪德蓮'
|
84
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100596747'
|
85
|
+
'13193872':
|
86
|
+
- title: '大賣場裡的人類學家:用人文科學搞懂消費者,解決最棘手的商業問題'
|
87
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100707843'
|
88
|
+
- title: '看不見的城市'
|
89
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100175133'
|
90
|
+
- title: '演算法統治世界'
|
91
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100710575'
|
92
|
+
- title: '從0到1:打開世界運作的未知祕密,在意想不到之處發現價值'
|
93
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100720260'
|
94
|
+
- title: '為社會而設計'
|
95
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100713612'
|
@@ -0,0 +1,47 @@
|
|
1
|
+
---
|
2
|
+
'12522728':
|
3
|
+
- title: '張愛玲的世界'
|
4
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100030671'
|
5
|
+
comment: '書中所收,尤其是王禎和〈張愛玲在台灣〉一文,珍貴且真摯。'
|
6
|
+
comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000238964&ci=12522728&cp=3'
|
7
|
+
- title: '小團圓'
|
8
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100192439'
|
9
|
+
comment: '祖奶奶晚期風格,誠實、枯寂又暴烈。'
|
10
|
+
comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000238963&ci=12522728&cp=3'
|
11
|
+
- title: '村上收音機(3):喜歡吃沙拉的獅子'
|
12
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100644311'
|
13
|
+
comment: '第一集到第三集都很好看,雋永且富於生趣的小品,有如爽口的下酒菜!'
|
14
|
+
comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000238962&ci=12522728&cp=3'
|
15
|
+
- title: '村上收音機(2):大蕪菁、難挑的酪梨'
|
16
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100635526'
|
17
|
+
comment: '第一集到第三集都很好看,雋永且富於生趣的小品,有如爽口的下酒菜!'
|
18
|
+
comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000238961&ci=12522728&cp=3'
|
19
|
+
- title: '村上收音機'
|
20
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100635522'
|
21
|
+
comment: '第一集到第三集都很好看,雋永且富於生趣的小品,有如爽口的下酒菜!'
|
22
|
+
comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000238960&ci=12522728&cp=3'
|
23
|
+
- title: '尋找漩渦貓的方法'
|
24
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100210251'
|
25
|
+
comment: '貓控大叔的異國生活誌,充滿了自以為是的趣味感,十分適合自認為有點怪怪的人(咦)。'
|
26
|
+
comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000238959&ci=12522728&cp=3'
|
27
|
+
- title: '拜訪糖果阿姨'
|
28
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100653796'
|
29
|
+
comment: '那些終於灰心或終於原諒的情感角落,發出被記憶篩過的,淡而暖的光。'
|
30
|
+
comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000224332&ci=12522728&cp=3'
|
31
|
+
- title: 'A夢'
|
32
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100732992'
|
33
|
+
comment: '鮮活聯繫新詩小傳統與當代文化,夭矯複雜如深情的孫悟空。'
|
34
|
+
comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000224330&ci=12522728&cp=3'
|
35
|
+
- title: '河流'
|
36
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100683568'
|
37
|
+
comment: '城市的女漫遊者,寫邊緣,寫冷淡,寫不被看見或被拋擲的。'
|
38
|
+
comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000224331&ci=12522728&cp=3'
|
39
|
+
- title: '紅樓夢校注彩畫本(1-3冊合售)'
|
40
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100161589'
|
41
|
+
comment: '此書要什麼有什麼,纏綿悱惻有之,狗皮倒灶有之,詩情畫意有之,醜態怪狀有之,不可能有厭煩的時候。'
|
42
|
+
comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000213998&ci=12522728&cp=3'
|
43
|
+
- title: '傾城之戀【張愛玲典藏新版】'
|
44
|
+
book_url: 'http://www.taaze.tw/sing.html?pid=11100228548'
|
45
|
+
comment: "張愛玲《傾城之戀》(應是舊版《第一爐香》,現已絕版)。最感官化的小說家,最斑斕的視覺,蒙太奇的敘事,張愛玲的「色彩狂」令人在閱讀時總會想像那電影般的畫面。\n"
|
46
|
+
comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000211000&ci=12522728&cp=3'
|
47
|
+
'13193872': ''
|
data/spec/tags_spec.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'minitest/autorun'
|
3
|
+
require 'vcr'
|
4
|
+
require 'webmock/minitest'
|
5
|
+
require_relative '../lib/taaze.rb'
|
6
|
+
|
7
|
+
BOOK_ID = %w(11100763435 11100745134 11100714084 11303096752)
|
8
|
+
tags_from_file = YAML.load(File.read('./spec/fixures/tags.yml'))
|
9
|
+
|
10
|
+
VCR.configure do |config|
|
11
|
+
config.cassette_library_dir = 'spec/fixures/vcr_cassettes'
|
12
|
+
config.hook_into :webmock
|
13
|
+
end
|
14
|
+
|
15
|
+
VCR.use_cassette('tags') do
|
16
|
+
BOOK_ID.each do |book_id|
|
17
|
+
tags_found = Taaze::TaazeBooktags.new(book_id).tags
|
18
|
+
describe "Get all the tags with specific book's id" do
|
19
|
+
it 'has the right number of tags' do
|
20
|
+
tags_found.size.must_equal tags_from_file[book_id].size
|
21
|
+
end
|
22
|
+
|
23
|
+
tags_found.map do |tag|
|
24
|
+
it 'finds tags in the test tags' do
|
25
|
+
exist = tags_from_file[book_id].include? tag
|
26
|
+
[tag, exist].must_equal [tag, true]
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
data/taaze.gemspec
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
$LOAD_PATH.push File.expand_path('../lib', __FILE__)
|
2
|
+
require 'taaze/version'
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = 'taaze'
|
6
|
+
s.version = Taaze::VERSION
|
7
|
+
s.executables << 'taaze'
|
8
|
+
s.add_runtime_dependency 'nokogiri'
|
9
|
+
s.date = Taaze::DATE
|
10
|
+
s.summary = 'Data extractor of Taaze'
|
11
|
+
s.description = 'Get the book comments or collections of specific user.'
|
12
|
+
s.authors = ['Bueze']
|
13
|
+
s.email = ''
|
14
|
+
s.files = `git ls-files`.split("\n")
|
15
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
16
|
+
s.homepage = 'https://github.com/BUEZE/taaze'
|
17
|
+
s.license = 'MIT'
|
18
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: taaze
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bueze
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -31,7 +31,26 @@ executables:
|
|
31
31
|
extensions: []
|
32
32
|
extra_rdoc_files: []
|
33
33
|
files:
|
34
|
+
- ".gitignore"
|
35
|
+
- ".travis.yml"
|
36
|
+
- Gemfile
|
37
|
+
- LICENSE
|
38
|
+
- README.md
|
39
|
+
- Rakefile
|
34
40
|
- bin/taaze
|
41
|
+
- example.jpg
|
42
|
+
- lib/taaze.rb
|
43
|
+
- lib/taaze/collections.rb
|
44
|
+
- lib/taaze/comments.rb
|
45
|
+
- lib/taaze/tags.rb
|
46
|
+
- lib/taaze/version.rb
|
47
|
+
- spec/collections_spec.rb
|
48
|
+
- spec/comments_spec.rb
|
49
|
+
- spec/fixures/collections.yml
|
50
|
+
- spec/fixures/comments.yml
|
51
|
+
- spec/fixures/tags.yml
|
52
|
+
- spec/tags_spec.rb
|
53
|
+
- taaze.gemspec
|
35
54
|
homepage: https://github.com/BUEZE/taaze
|
36
55
|
licenses:
|
37
56
|
- MIT
|
@@ -56,4 +75,10 @@ rubygems_version: 2.5.0
|
|
56
75
|
signing_key:
|
57
76
|
specification_version: 4
|
58
77
|
summary: Data extractor of Taaze
|
59
|
-
test_files:
|
78
|
+
test_files:
|
79
|
+
- spec/collections_spec.rb
|
80
|
+
- spec/comments_spec.rb
|
81
|
+
- spec/fixures/collections.yml
|
82
|
+
- spec/fixures/comments.yml
|
83
|
+
- spec/fixures/tags.yml
|
84
|
+
- spec/tags_spec.rb
|