taaze 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3d5eb57aef00d5fdf5a258dde8edc152ac8a2aa4
4
- data.tar.gz: 8a9d617755a2dc8d808bcef771ac01e19450025a
3
+ metadata.gz: 503d21509d28f6411ef50ff8b1ffba0811340922
4
+ data.tar.gz: ecdce9af22a15b483e625ac71f986b3a405619c1
5
5
  SHA512:
6
- metadata.gz: e6a308cc4787de2f21ba5f5afaffeae17475d6ed820d8a8cb67138cc6bc25e8a840041915dacd8acb6426cfd168b1fc71797c4a307dccb8d5dff9e90cad17d4e
7
- data.tar.gz: 29ab262a8f2971f55e6f828f6e6cfec3dfb3efaf2af657d666eae0b4c0b86b8565e9cd54fe90310a97953d1c26ba92575ad595a1c1d178ae4eec148b3a11fd81
6
+ metadata.gz: 5b197a82f8aed1e829b999339127b49f142d16c60b284d86bed60052333bae8d9879ec237e0a98f94d1772605ecdbab1cf9abb21adae84f845b62925751e3158
7
+ data.tar.gz: e3845845ca6638c0e512f285a2d4c0d02d5d40e770a0939f05fa52a55e4051acb4fb6a4caae352b4c075e4853cd411036a30694875e05780b2a6e51a777c258d
data/.gitignore ADDED
@@ -0,0 +1,80 @@
1
+ # Created by https://www.gitignore.io/api/ruby,osx,vim
2
+
3
+ ### Ruby ###
4
+ *.gem
5
+ *.rbc
6
+ /.config
7
+ /coverage/
8
+ /InstalledFiles
9
+ /pkg/
10
+ /spec/reports/
11
+ /spec/examples.txt
12
+ /test/tmp/
13
+ /test/version_tmp/
14
+ /tmp/
15
+ Gemfile.lock
16
+
17
+ ## Specific to RubyMotion:
18
+ .dat*
19
+ .repl_history
20
+ build/
21
+
22
+ ## VCR
23
+ /spec/fixures/vcr_cassettes
24
+
25
+ ## Documentation cache and generated files:
26
+ /.yardoc/
27
+ /_yardoc/
28
+ /doc/
29
+ /rdoc/
30
+
31
+ ## Environment normalisation:
32
+ /.bundle/
33
+ /vendor/bundle
34
+ /lib/bundler/man/
35
+
36
+ # for a library or gem, you might want to ignore these files since the code is
37
+ # intended to run in multiple environments; otherwise, check them in:
38
+ # Gemfile.lock
39
+ # .ruby-version
40
+ # .ruby-gemset
41
+
42
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
43
+ .rvmrc
44
+
45
+
46
+ ### OSX ###
47
+ .DS_Store
48
+ .AppleDouble
49
+ .LSOverride
50
+
51
+ # Icon must end with two \r
52
+ Icon
53
+
54
+
55
+ # Thumbnails
56
+ ._*
57
+
58
+ # Files that might appear in the root of a volume
59
+ .DocumentRevisions-V100
60
+ .fseventsd
61
+ .Spotlight-V100
62
+ .TemporaryItems
63
+ .Trashes
64
+ .VolumeIcon.icns
65
+
66
+ # Directories potentially created on remote AFP share
67
+ .AppleDB
68
+ .AppleDesktop
69
+ Network Trash Folder
70
+ Temporary Items
71
+ .apdisk
72
+
73
+
74
+ ### Vim ###
75
+ [._]*.s[a-w][a-z]
76
+ [._]s[a-w][a-z]
77
+ *.un~
78
+ Session.vim
79
+ .netrwhist
80
+ *~
data/.travis.yml ADDED
@@ -0,0 +1,8 @@
1
+ language: ruby
2
+ rvm:
3
+ - ruby-head
4
+ - ruby-2.1.0
5
+ - jruby-head
6
+ branches:
7
+ only:
8
+ - master
data/Gemfile ADDED
@@ -0,0 +1,11 @@
1
+ source 'https://rubygems.org'
2
+
3
+ group :test do
4
+ gem 'rake'
5
+ end
6
+
7
+ gem 'nokogiri'
8
+ gem 'minitest'
9
+ gem 'json'
10
+ gem 'vcr'
11
+ gem 'webmock'
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 BUEZE
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
data/README.md ADDED
@@ -0,0 +1,56 @@
1
+ # taaze [![Build Status](https://travis-ci.org/BUEZE/taaze.svg?branch=master)](https://travis-ci.org/BUEZE/taaze)
2
+
3
+ ---
4
+ Taaze is a light package wrote by Ruby. It easy to use for getting comments or collections from online bookstore [Taaze | 讀冊生活](http://www.taaze.tw/index.html).
5
+
6
+ ### How To Get Started
7
+ Install by Ruby gem [Taaze](https://rubygems.org/gems/taaze)
8
+
9
+ `gem install taaze`
10
+
11
+
12
+ ### Requirements
13
+ - [Nokogiri](http://nokogiri.org) - for parsing html use
14
+ - [JSON](http://ruby-doc.org/stdlib-2.0.0/libdoc/json/rdoc/JSON.html) - parsing json data
15
+
16
+ `$ bundle install`
17
+
18
+ ### Usage
19
+ This gem may be used as a command line utility or called from code
20
+
21
+ #### How to get ID
22
+ ##### User's comment
23
+ - You can get user list at [冊格子](http://www.taaze.tw/zekea_index.html)
24
+ - In every user's personal page , you can get his or her ID from URL
25
+ - For Example :
26
+ - http://www.taaze.tw/container_zekeaclt_view.html?ci=13313301
27
+ - 'ci=' is User ID
28
+ - Like the user id, you can also get any books' ID from the URL
29
+ - And there are 3 argument for use
30
+ - --comments : get all comments for the user
31
+ - --collections : get all book collection for the user
32
+ - --tags : get all tags of the book
33
+ - And add yaml path where you want to store
34
+ - --output [file_path]
35
+ - Finally the Taaze will return yaml format file
36
+
37
+ ### CLI
38
+
39
+ `taaze --comments [user_id] --output [file_path]` OR
40
+
41
+ `taaze --collections [user_id] --output [file_path]` OR
42
+
43
+ `taaze --tags [book_id] --output [file_path]`
44
+
45
+ ### For Example
46
+
47
+ `taaze --comments 12522728 --output ./comments.yml`
48
+
49
+ `taaze --collections 12522728 --output ./collections.yml`
50
+
51
+ `taaze --tags 11100763435 --output ./tags.yml`
52
+
53
+
54
+ ### Output Sample
55
+
56
+ ![example.jpg](example.jpg)
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require 'rake/testtask'
2
+
3
+ task default: [:spec]
4
+
5
+ desc 'Run specs'
6
+ Rake::TestTask.new(:spec) do |t|
7
+ t.pattern = 'spec/*_spec.rb'
8
+ end
data/example.jpg ADDED
Binary file
data/lib/taaze.rb ADDED
@@ -0,0 +1,3 @@
1
+ require_relative 'taaze/collections.rb'
2
+ require_relative 'taaze/comments.rb'
3
+ require_relative 'taaze/tags.rb'
@@ -0,0 +1,56 @@
1
+ require 'json'
2
+ require 'open-uri'
3
+ require 'date'
4
+
5
+ module Taaze
6
+ # This class get the user personal page as an input
7
+ # return a hash of user's collections information
8
+ # For example, with user's url: http://www.taaze.tw/container_zekeaclt_view.html?ci=12522728
9
+ # here's the user's collections of books: http://www.taaze.tw/container_zekeaclt_view.html?ci=12522728&cp=1
10
+ # Sampel output (plz remove them after passing the test):
11
+ # [
12
+ # {"title"=>"村上收音機", "book_url"=>"http://www.taaze.tw/sing.html?pid=11100635522"}
13
+ # {"title"=>"尋找漩渦貓的方法", "book_url"=>"http://www.taaze.tw/sing.html?pid=11100210251"}
14
+ # and so on ...
15
+ # ]
16
+ class TaazeCollections
17
+ USERS_URL = 'http://www.taaze.tw/container_zekeaclt_view.html?ci='
18
+ API_URL = 'http://www.taaze.tw/beta/zekeacltBooksDataAgent.jsp?custId='
19
+ BOOK_URL = 'http://www.taaze.tw/sing.html?pid='
20
+
21
+ def initialize(user_id)
22
+ parse_html(user_id)
23
+ end
24
+
25
+ # Return a hash of user's collections
26
+ def collections
27
+ @collections ||= extract_books
28
+ end
29
+
30
+ private
31
+
32
+ # Parse the html of the url page.
33
+ def parse_html(user_id)
34
+ url = API_URL + user_id
35
+ first_doc = JSON.parse(open(url).read)
36
+ totalsize = first_doc['totalsize']
37
+
38
+ url = API_URL + user_id + '&startNum=0&endNum=' + totalsize.to_s
39
+ @doc = JSON.parse(open(url).read)['result1']
40
+ end
41
+
42
+ # Return the books in the format specified in spec.
43
+ def extract_books
44
+ booklist = []
45
+ if @doc.count != 0
46
+ @doc.each do |book_data|
47
+ book = {}
48
+ book['title'] = book_data['titleMain']
49
+ book['book_url'] = BOOK_URL + book_data['prodId']
50
+ booklist << book
51
+ end
52
+ end
53
+ booklist
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,120 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'date'
4
+ require 'json'
5
+ require 'net/http'
6
+
7
+ # Module Taaze
8
+ module Taaze
9
+ # This class get the user personal page as an input
10
+ # return a hash of user's collections information
11
+ # For example, with user's url: http://www.taaze.tw/container_zekeaclt_view.html?ci=12522728
12
+ # here's the user's comments of books: http://www.taaze.tw/container_zekeaclt_view.html?ci=12522728&cp=2
13
+ # Sampel output (plz remove them after passing the test):
14
+ class TaazeComments
15
+ USERS_URL = 'http://www.taaze.tw/container_zekeaclt_view.html?ci='
16
+ MAIN_URL = 'http://www.taaze.tw/container_zekeaclt_view.html?'
17
+ API_URL = 'http://www.taaze.tw/beta/zekeaCommentDataAgent.jsp?custId='
18
+ BOOK_URL = 'http://www.taaze.tw/sing.html?pid='
19
+
20
+ def initialize(user_id)
21
+ user_id = numeric?(user_id) ? user_id.to_s : user_id
22
+ parse_html(user_id)
23
+ end
24
+
25
+ # Return a hash of user's comments
26
+ def comments
27
+ @comments_found
28
+ end
29
+
30
+ private
31
+
32
+ # Check if it is a integer
33
+ def numeric?(obj)
34
+ obj.to_s.match(/\A[+-]?\d+?(\.\d+)?\Z/) == nil? ? false : true
35
+ end
36
+
37
+ # Parse the html of the url page.
38
+ def split_url(url)
39
+ str = split_get_last(split_get_last(url, '/'), '?')
40
+ str = split_get_last(str, '&', -1)
41
+ str
42
+ end
43
+
44
+ # split string and get last
45
+ # index for
46
+ # -1 -> return all array
47
+ # 0 -> return last obj in array
48
+ # >=1 -> return obj on array[index]
49
+ def split_get_last(str_to_split, character, index = 0)
50
+ arrtmp = str_to_split.split(character)
51
+ if index == -1
52
+ arrtmp
53
+ else
54
+ index = index == 0 ? arrtmp.length - 1 : index
55
+ strtmp = arrtmp[index]
56
+ strtmp
57
+ end
58
+ end
59
+
60
+ # Parse the html of the url page.
61
+ def parse_html(user_id)
62
+ url = API_URL + user_id.to_s + '&startNum=0&endNum=0'
63
+ # first test and get comments num for the user
64
+ first_doc = JSON.parse(url_get_html(url))
65
+ total_num = first_doc['totalsize']
66
+
67
+ url = API_URL + user_id.to_s + '&startNum=0&endNum=' + total_num
68
+ # mainDoc is the all json date for the user's all comment
69
+ main_doc = JSON.parse(url_get_html(url))
70
+ extract_comments(main_doc['result1'], user_id)
71
+ end
72
+
73
+ # Send url to get response
74
+ def url_get_html(url_str)
75
+ url = URI.parse(URI.encode(url_str)) # first get total size
76
+ req = Net::HTTP::Get.new(url.to_s)
77
+ res = Net::HTTP.start(url.host, url.port) { |http| http.request(req) }
78
+ res.body
79
+ end
80
+
81
+ # Return the comments in the format specified in spec.
82
+ def extract_comments(content, user_id)
83
+ # Json format~
84
+ # "content":"勇氣就是熱誠,來自於我們對自己工作的自信心;.....",
85
+ # "title":"",
86
+ # "status":"C",
87
+ # "stars":"5",
88
+ # "prodId":"11100597685",
89
+ # "titleMain":"行銷之神原一平全集(精裝版)",
90
+ # "orgProdId":"11100597685",
91
+ # "pkNo":"1000243977",
92
+ # "mdf_time":"2015/10/15",
93
+ # "crt_time":"2015/10/15"
94
+ #
95
+ # orgProdId -> bookID -> 11100597685
96
+ # title -> book title
97
+ # content -> comment
98
+ # comment_url
99
+ # pkNo -> comment ID ->13313301
100
+ #
101
+ # http://www.taaze.tw/container_zekeaclt_view.html?co=1000238964&ci=12522728&cp=3
102
+ # co->comment ci->user
103
+ data_arr = []
104
+ if content
105
+ content.each do |cmtItem|
106
+ data_hash_sub = Hash.new {}
107
+ data_hash_sub['title'] = cmtItem['titleMain']
108
+ data_hash_sub['comment'] = cmtItem['content']
109
+ data_hash_sub['book_url'] = BOOK_URL + cmtItem['orgProdId']
110
+ url = MAIN_URL + 'co=' + cmtItem['pkNo'] + '&ci=' + user_id + '&cp=3'
111
+ data_hash_sub['comment_url'] = url
112
+ data_arr.push(data_hash_sub)
113
+ end
114
+ else
115
+ data_arr = []
116
+ end
117
+ @comments_found ||= data_arr
118
+ end
119
+ end
120
+ end
data/lib/taaze/tags.rb ADDED
@@ -0,0 +1,38 @@
1
+ require 'open-uri'
2
+ require 'nokogiri'
3
+
4
+ module Taaze
5
+ # This class get the product id as an input
6
+ # return a list of tags
7
+ # Sample input with product id : 11100763252
8
+ # Sample output ['轉型正義', '白色恐怖', '社會科學', '臺灣民主運動']
9
+
10
+ class TaazeBooktags
11
+ BOOKS_URL = 'http://www.taaze.tw/sing.html?pid='
12
+
13
+ def initialize(book_id)
14
+ parse_html(book_id)
15
+ end
16
+
17
+ # Return a list of book's tags
18
+ def tags
19
+ @tags ||= extract_tags
20
+ end
21
+
22
+ private
23
+
24
+ # parse the html
25
+ def parse_html(book_id)
26
+ url = BOOKS_URL + book_id
27
+ @document = Nokogiri::HTML(open(url))
28
+ end
29
+
30
+ def extract_tags
31
+ tags = []
32
+ @document.xpath('//a[@class="tag"]').each do |t|
33
+ tags << t.text
34
+ end
35
+ tags
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,5 @@
1
+ # Versioning
2
+ module Taaze
3
+ VERSION = '0.0.5'
4
+ DATE = '2015-10-25'
5
+ end
@@ -0,0 +1,32 @@
1
+ require 'yaml'
2
+ require 'minitest/autorun'
3
+ require 'vcr'
4
+ require 'webmock/minitest'
5
+ require_relative '../lib/taaze.rb'
6
+
7
+ USER_ID = %w(12522728 13193872)
8
+ collections_from_file = YAML.load(File.read('./spec/fixures/collections.yml'))
9
+
10
+ VCR.configure do |config|
11
+ config.cassette_library_dir = 'spec/fixures/vcr_cassettes'
12
+ config.hook_into :webmock
13
+ end
14
+
15
+ VCR.use_cassette('collections') do
16
+ USER_ID.each do |user_id|
17
+ collections_found = Taaze::TaazeCollections.new(user_id).collections
18
+
19
+ describe "Get all the collections with specific user's id" do
20
+ it 'has the right number of collections' do
21
+ collections_found.size.must_equal collections_from_file[user_id].size
22
+ end
23
+
24
+ collections_found.map do |book|
25
+ it 'finds book in the test collection' do
26
+ exist = collections_from_file[user_id].include? book
27
+ [book, exist].must_equal [book, true]
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,32 @@
1
+ require 'yaml'
2
+ require 'minitest/autorun'
3
+ require 'vcr'
4
+ require 'webmock/minitest'
5
+ require_relative '../lib/taaze.rb'
6
+
7
+ USER_ID = %w(12522728 13193872)
8
+ comments_from_file = YAML.load(File.read('./spec/fixures/comments.yml'))
9
+
10
+ VCR.configure do |config|
11
+ config.cassette_library_dir = 'spec/fixures/vcr_cassettes'
12
+ config.hook_into :webmock
13
+ end
14
+
15
+ VCR.use_cassette('comments') do
16
+ USER_ID.each do |user_id|
17
+ comments_found = Taaze::TaazeComments.new(user_id).comments
18
+
19
+ describe "Get all the comments with specific user's id" do
20
+ it 'has the right number of comments' do
21
+ comments_found.size.must_equal comments_from_file[user_id].size
22
+ end
23
+
24
+ comments_found.map do |comment|
25
+ it 'finds comments in the test comments' do
26
+ exist = comments_from_file[user_id].include? comment
27
+ [comment, exist].must_equal [comment, true]
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,95 @@
1
+ ---
2
+ '12522728':
3
+ - title: '村上收音機'
4
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100635522'
5
+ - title: '尋找漩渦貓的方法'
6
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100210251'
7
+ - title: '張愛玲的世界'
8
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100030671'
9
+ - title: '拜訪糖果阿姨'
10
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100653796'
11
+ - title: 'A夢'
12
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100732992'
13
+ - title: '河流'
14
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100683568'
15
+ - title: '紅樓夢校注彩畫本(1-3冊合售)'
16
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100161589'
17
+ - title: '傾城之戀【張愛玲典藏新版】'
18
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100228548'
19
+ - title: '金烏'
20
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100677389'
21
+ - title: '臺灣成長小說選(增訂版)'
22
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100649825'
23
+ - title: '靈魂的領地:國民散文讀本 '
24
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100653909'
25
+ - title: '懸崖上的花園:太平洋戰爭時期上海文學場域(1942-1945)'
26
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100672028'
27
+ - title: '九重葛與美少年'
28
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100657709'
29
+ - title: '春雪:豐饒之海四部曲(1)'
30
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100034168'
31
+ - title: '魯迅小說集'
32
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100067679'
33
+ - title: '波赫士(1-4冊合售合售)'
34
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100125593'
35
+ - title: '奇萊前書'
36
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100067730'
37
+ - title: '半生緣【張愛玲典藏新版】'
38
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100192438'
39
+ - title: '瓊美卡隨想錄(散文)'
40
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100616297'
41
+ - title: '犄角'
42
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100617464'
43
+ - title: '疾病的隱喻 '
44
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100625913'
45
+ - title: '古都'
46
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100268249'
47
+ - title: '觀看的方式'
48
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100238376'
49
+ - title: '臺灣成長小說選'
50
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100035125'
51
+ - title: '晨讀10分鐘:青春無敵早點詩:中學生新詩選'
52
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100631660'
53
+ - title: '屏息的文明'
54
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100034203'
55
+ - title: '雲和'
56
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100034334'
57
+ - title: '海風野火花'
58
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100249859'
59
+ - title: '你的聲音充滿時間'
60
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100276696'
61
+ - title: '離與苦:戰爭的延續'
62
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100254818'
63
+ - title: '少女維特'
64
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100241234'
65
+ - title: '搜索者'
66
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100067568'
67
+ - title: '浮生草'
68
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100591888'
69
+ - title: '溫莎墓園日記(小說)'
70
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100617237'
71
+ - title: '漫長的告別'
72
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100210749'
73
+ - title: '雙月記'
74
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100236244'
75
+ - title: '愛情的盡頭'
76
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100210189'
77
+ - title: '我們在此相遇'
78
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100154779'
79
+ - title: '月球姓氏(聯合文學經典版)'
80
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100220368'
81
+ - title: '小團圓'
82
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100192439'
83
+ - title: '瑪德蓮'
84
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100596747'
85
+ '13193872':
86
+ - title: '大賣場裡的人類學家:用人文科學搞懂消費者,解決最棘手的商業問題'
87
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100707843'
88
+ - title: '看不見的城市'
89
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100175133'
90
+ - title: '演算法統治世界'
91
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100710575'
92
+ - title: '從0到1:打開世界運作的未知祕密,在意想不到之處發現價值'
93
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100720260'
94
+ - title: '為社會而設計'
95
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100713612'
@@ -0,0 +1,47 @@
1
+ ---
2
+ '12522728':
3
+ - title: '張愛玲的世界'
4
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100030671'
5
+ comment: '書中所收,尤其是王禎和〈張愛玲在台灣〉一文,珍貴且真摯。'
6
+ comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000238964&ci=12522728&cp=3'
7
+ - title: '小團圓'
8
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100192439'
9
+ comment: '祖奶奶晚期風格,誠實、枯寂又暴烈。'
10
+ comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000238963&ci=12522728&cp=3'
11
+ - title: '村上收音機(3):喜歡吃沙拉的獅子'
12
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100644311'
13
+ comment: '第一集到第三集都很好看,雋永且富於生趣的小品,有如爽口的下酒菜!'
14
+ comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000238962&ci=12522728&cp=3'
15
+ - title: '村上收音機(2):大蕪菁、難挑的酪梨'
16
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100635526'
17
+ comment: '第一集到第三集都很好看,雋永且富於生趣的小品,有如爽口的下酒菜!'
18
+ comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000238961&ci=12522728&cp=3'
19
+ - title: '村上收音機'
20
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100635522'
21
+ comment: '第一集到第三集都很好看,雋永且富於生趣的小品,有如爽口的下酒菜!'
22
+ comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000238960&ci=12522728&cp=3'
23
+ - title: '尋找漩渦貓的方法'
24
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100210251'
25
+ comment: '貓控大叔的異國生活誌,充滿了自以為是的趣味感,十分適合自認為有點怪怪的人(咦)。'
26
+ comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000238959&ci=12522728&cp=3'
27
+ - title: '拜訪糖果阿姨'
28
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100653796'
29
+ comment: '那些終於灰心或終於原諒的情感角落,發出被記憶篩過的,淡而暖的光。'
30
+ comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000224332&ci=12522728&cp=3'
31
+ - title: 'A夢'
32
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100732992'
33
+ comment: '鮮活聯繫新詩小傳統與當代文化,夭矯複雜如深情的孫悟空。'
34
+ comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000224330&ci=12522728&cp=3'
35
+ - title: '河流'
36
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100683568'
37
+ comment: '城市的女漫遊者,寫邊緣,寫冷淡,寫不被看見或被拋擲的。'
38
+ comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000224331&ci=12522728&cp=3'
39
+ - title: '紅樓夢校注彩畫本(1-3冊合售)'
40
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100161589'
41
+ comment: '此書要什麼有什麼,纏綿悱惻有之,狗皮倒灶有之,詩情畫意有之,醜態怪狀有之,不可能有厭煩的時候。'
42
+ comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000213998&ci=12522728&cp=3'
43
+ - title: '傾城之戀【張愛玲典藏新版】'
44
+ book_url: 'http://www.taaze.tw/sing.html?pid=11100228548'
45
+ comment: "張愛玲《傾城之戀》(應是舊版《第一爐香》,現已絕版)。最感官化的小說家,最斑斕的視覺,蒙太奇的敘事,張愛玲的「色彩狂」令人在閱讀時總會想像那電影般的畫面。\n"
46
+ comment_url: 'http://www.taaze.tw/container_zekeaclt_view.html?co=1000211000&ci=12522728&cp=3'
47
+ '13193872': ''
@@ -0,0 +1,16 @@
1
+ ---
2
+ '11100763435':
3
+ - '文化研究'
4
+ - '社會科學'
5
+ - '台灣史地'
6
+ - '中文書'
7
+ - '人文史地'
8
+ '11100714084':
9
+ - '建議國家高層讀一下'
10
+ '11303096752': ''
11
+ '11100745134':
12
+ - '旅遊'
13
+ - '單車'
14
+ - '台灣歷史'
15
+ - '打狗'
16
+ - '高雄'
data/spec/tags_spec.rb ADDED
@@ -0,0 +1,31 @@
1
+ require 'yaml'
2
+ require 'minitest/autorun'
3
+ require 'vcr'
4
+ require 'webmock/minitest'
5
+ require_relative '../lib/taaze.rb'
6
+
7
+ BOOK_ID = %w(11100763435 11100745134 11100714084 11303096752)
8
+ tags_from_file = YAML.load(File.read('./spec/fixures/tags.yml'))
9
+
10
+ VCR.configure do |config|
11
+ config.cassette_library_dir = 'spec/fixures/vcr_cassettes'
12
+ config.hook_into :webmock
13
+ end
14
+
15
+ VCR.use_cassette('tags') do
16
+ BOOK_ID.each do |book_id|
17
+ tags_found = Taaze::TaazeBooktags.new(book_id).tags
18
+ describe "Get all the tags with specific book's id" do
19
+ it 'has the right number of tags' do
20
+ tags_found.size.must_equal tags_from_file[book_id].size
21
+ end
22
+
23
+ tags_found.map do |tag|
24
+ it 'finds tags in the test tags' do
25
+ exist = tags_from_file[book_id].include? tag
26
+ [tag, exist].must_equal [tag, true]
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
data/taaze.gemspec ADDED
@@ -0,0 +1,18 @@
1
+ $LOAD_PATH.push File.expand_path('../lib', __FILE__)
2
+ require 'taaze/version'
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'taaze'
6
+ s.version = Taaze::VERSION
7
+ s.executables << 'taaze'
8
+ s.add_runtime_dependency 'nokogiri'
9
+ s.date = Taaze::DATE
10
+ s.summary = 'Data extractor of Taaze'
11
+ s.description = 'Get the book comments or collections of specific user.'
12
+ s.authors = ['Bueze']
13
+ s.email = ''
14
+ s.files = `git ls-files`.split("\n")
15
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
16
+ s.homepage = 'https://github.com/BUEZE/taaze'
17
+ s.license = 'MIT'
18
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: taaze
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bueze
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-24 00:00:00.000000000 Z
11
+ date: 2015-10-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -31,7 +31,26 @@ executables:
31
31
  extensions: []
32
32
  extra_rdoc_files: []
33
33
  files:
34
+ - ".gitignore"
35
+ - ".travis.yml"
36
+ - Gemfile
37
+ - LICENSE
38
+ - README.md
39
+ - Rakefile
34
40
  - bin/taaze
41
+ - example.jpg
42
+ - lib/taaze.rb
43
+ - lib/taaze/collections.rb
44
+ - lib/taaze/comments.rb
45
+ - lib/taaze/tags.rb
46
+ - lib/taaze/version.rb
47
+ - spec/collections_spec.rb
48
+ - spec/comments_spec.rb
49
+ - spec/fixures/collections.yml
50
+ - spec/fixures/comments.yml
51
+ - spec/fixures/tags.yml
52
+ - spec/tags_spec.rb
53
+ - taaze.gemspec
35
54
  homepage: https://github.com/BUEZE/taaze
36
55
  licenses:
37
56
  - MIT
@@ -56,4 +75,10 @@ rubygems_version: 2.5.0
56
75
  signing_key:
57
76
  specification_version: 4
58
77
  summary: Data extractor of Taaze
59
- test_files: []
78
+ test_files:
79
+ - spec/collections_spec.rb
80
+ - spec/comments_spec.rb
81
+ - spec/fixures/collections.yml
82
+ - spec/fixures/comments.yml
83
+ - spec/fixures/tags.yml
84
+ - spec/tags_spec.rb