crowd_funding_parser 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,90 +1,90 @@
1
- require 'open-uri'
2
-
3
1
  module CrowdFundingParser
4
2
  module Parser
5
3
  class Flyingv < General
6
- def initialize(*cat)
7
- categories = cat.empty? ? ["designgoods", "media", "stageplay", "entertainment", "publish", "society", "technology", "food", "travel"] : cat
8
- @url = "https://www.flyingv.cc"
9
- @targets = []
4
+ MethodBuilder.set_methods do
5
+ insert_parser "Flyingv"
10
6
 
11
- categories.each do |category|
12
- category_url = @url + "/category/#{category}"
13
- @targets << open(category_url)
7
+ set_variable do
8
+ @platform_url = "https://www.flyingv.cc"
9
+ @time_regex = /(\d{4}\/\d{2}\/\d{2}).+(\d{4}\/\d{2}\/\d{2})/
14
10
  end
15
11
 
16
- @item_css_class = ".portfolio-item"
17
- @status_css_class = ".unit-time"
18
- end
12
+ set_method :get_title do |doc|
13
+ get_string(doc.css(".page-title-wrapper").css(".pagesTitle"))
14
+ end
19
15
 
20
- # for project's info
21
- def get_id(rel_url)
22
- rel_url.split("/").last
23
- end
16
+ set_method :get_category do |doc|
17
+ doc.css(".page-title-wrapper").css(".pageDes").first.css("a").first.text
18
+ end
24
19
 
25
- def get_title(doc)
26
- get_string(doc.css(".page-title-wrapper").css(".pagesTitle"))
27
- end
20
+ set_method :get_creator_name do |doc|
21
+ doc.css(".page-title-wrapper").css(".pageDes")[1].css("a").first.text.strip
22
+ end
28
23
 
29
- def get_category(doc)
30
- doc.css(".page-title-wrapper").css(".pageDes").first.css("a").first.text
31
- end
24
+ set_method :get_creator_id do |doc|
25
+ doc.css(".page-title-wrapper").css(".pageDes")[1].css("a").first["href"].split("/").last
26
+ end
32
27
 
33
- def get_creator_name(doc)
34
- doc.css(".page-title-wrapper").css(".pageDes")[1].css("a").first.text.strip
35
- end
28
+ set_method :get_creator_link do |doc|
29
+ @platform_url + doc.css(".profilemeta .imp a").first["href"]
30
+ end
36
31
 
37
- def get_creator_id(doc)
38
- doc.css(".page-title-wrapper").css(".pageDes")[1].css("a").first["href"].split("/").last
39
- end
32
+ set_method :get_summary do |doc|
33
+ doc.css(".project_content").first.text.to_s[0..500].strip
34
+ end
40
35
 
41
- def get_creator_link(doc)
42
- @url + doc.css(".profilemeta .imp a").first["href"]
43
- end
36
+ set_method :get_start_date do |doc|
37
+ text = get_string(doc.css(".col-xs-4.sidebarprj")).gsub(/\n/, "")
38
+ @time_regex.match(text)[1]
39
+ end
44
40
 
45
- def get_summary(doc)
46
- doc.css(".project_content").first.text.to_s[0..500].strip
47
- end
41
+ set_method :get_end_date do |doc|
42
+ text = get_string(doc.css(".col-xs-4.sidebarprj")).gsub(/\n/, "")
43
+ @time_regex.match(text)[2]
44
+ end
48
45
 
49
- # for tracking
46
+ set_method :get_region do |doc|
47
+ "Taiwan"
48
+ end
50
49
 
51
- def get_money_goal(doc)
52
- money_string(get_string(doc.css(".countdes .dt .white")))
53
- end
50
+ set_method :get_money_goal do |doc|
51
+ money_string(get_string(doc.css(".countdes .dt .white")))
52
+ end
54
53
 
55
- def get_money_pledged(doc)
56
- money_string(get_string(doc.css(".countdes .ut .rtt h3")))
57
- end
54
+ set_method :get_money_pledged do |doc|
55
+ money_string(get_string(doc.css(".countdes .ut .rtt h3")))
56
+ end
58
57
 
59
- def get_backer_count(doc)
60
- get_string(doc.css(".countdes .dt .pull-right")).sub("人贊助", "")
61
- end
58
+ set_method :get_backer_count do |doc|
59
+ get_string(doc.css(".countdes .dt .pull-right")).sub("人贊助", "")
60
+ end
62
61
 
63
- def get_last_time(doc)
64
- get_string(doc.css(".countdes .dt div:nth-child(2)")).sub("剩餘", "")
65
- end
62
+ set_method :get_left_time do |doc|
63
+ get_string(doc.css(".countdes .dt div:nth-child(2)")).sub("剩餘", "")
64
+ end
66
65
 
67
- def get_status(last_time)
68
- if last_time.match("已結束")
69
- "finished"
70
- elsif last_time.match("開始")
71
- "preparing"
72
- else
73
- "online"
66
+ set_method :get_status do |left_time|
67
+ if left_time.match("已結束")
68
+ "finished"
69
+ elsif left_time.match("開始")
70
+ "preparing"
71
+ else
72
+ "online"
73
+ end
74
74
  end
75
- end
76
75
 
77
- def get_fb_count(doc)
78
- get_string(doc.css("#fbBtn .sharenumber"))
79
- end
76
+ set_method :get_fb_count do |doc|
77
+ get_string(doc.css("#fbBtn .sharenumber"))
78
+ end
80
79
 
81
- def get_following_count(doc)
82
- get_string(doc.css(".sidebarprj h5")).sub("人追踨", "").sub("追蹤", "").strip
83
- end
80
+ set_method :get_following_count do |doc|
81
+ get_string(doc.css(".sidebarprj h5")).sub("人追踨", "").sub("追蹤", "").strip
82
+ end
84
83
 
85
- def get_backer_list(project_url)
86
- []
84
+ set_method :get_currency_string do |result|
85
+ "twd"
86
+ end
87
87
  end
88
88
  end
89
89
  end
90
- end
90
+ end
@@ -0,0 +1,99 @@
1
+ module CrowdFundingParser
2
+ module Parser
3
+ class Hereo < General
4
+ def initialize
5
+ @platform_url = "http://www.hereo.cc/"
6
+ @item_css_class = ".project-list ul li"
7
+ @status_css_class = ".projectImg .info .inner .detail span:nth-child(1)"
8
+ end
9
+
10
+ def get_lists
11
+ [HTTParty.get(@platform_url + "/project-list.php")]
12
+ end
13
+
14
+ def get_id(project_url)
15
+ project_url.split("pid=").last
16
+ end
17
+
18
+ MethodBuilder.set_methods do
19
+ insert_parser "Hereo"
20
+
21
+ set_variable do
22
+ @platform_url = "http://www.hereo.cc/"
23
+ end
24
+
25
+ set_method :get_title do |doc|
26
+ get_string(doc.css(".container .text h3"))
27
+ end
28
+
29
+ set_method :get_category do |doc|
30
+ get_string(doc.css(".contentMain .projectTag"))
31
+ end
32
+
33
+ set_method :get_creator_name do |doc|
34
+ get_string(doc.css(".user-info .user .name h4 a"))
35
+ end
36
+
37
+ set_method :get_creator_id do |doc|
38
+ doc.css(".user-info .user .name h4 a")[0]["href"].match(/mid=(\d+)/)[1]
39
+ end
40
+
41
+ set_method :get_creator_link do |doc|
42
+ @platform_url + doc.css(".user-info .user .name h4 a")[0]["href"]
43
+ end
44
+
45
+ set_method :get_summary do |doc|
46
+ doc.css(".container div.text").first.text.gsub(/\s/, "")
47
+ end
48
+
49
+ set_method :get_start_date do |doc|
50
+ end
51
+
52
+ set_method :get_end_date do |doc|
53
+ doc.css(".projectInfo .detail .inner p").text.match(/\d{4}\/\d{2}\/\d{2}/).to_s
54
+ end
55
+
56
+ set_method :get_region do |doc|
57
+ "Taiwan"
58
+ end
59
+
60
+ set_method :get_money_pledged do |doc|
61
+ money_string(doc.css(".projectInfo .funded .inner .number strong").text.match(/[0-9,]+/).to_s)
62
+ end
63
+
64
+ set_method :get_money_goal do |doc|
65
+ money_string(get_string(doc.css(".sidebar h3.num")))
66
+ end
67
+
68
+ set_method :get_backer_count do |doc|
69
+ doc.css(".projectInfo .table .numberOfPeople .inner strong").text
70
+ end
71
+
72
+ set_method :get_left_time do |doc|
73
+ raw_string = doc.css(".projectInfo .table .time .inner").text.gsub(/\s/, "")
74
+ match_data = raw_string.match(/(\d+).*(天|小時)/)
75
+ match_data[1] + match_data[2]
76
+ end
77
+
78
+ set_method :get_status do |left_time|
79
+ if left_time.match("集資中")
80
+ "online"
81
+ elsif left_time.match("結束") || left_time.match("成功") || left_time.match(/\d+/).to_s == "0"
82
+ "finished"
83
+ else
84
+ "online"
85
+ end
86
+ end
87
+
88
+ set_method :get_following_count do
89
+ doc.css("strong#track-count").text
90
+ end
91
+
92
+ set_method :get_currency_string do |result|
93
+ "twd"
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
99
+
@@ -0,0 +1,158 @@
1
+ require "json"
2
+ require "iconv"
3
+
4
+ module CrowdFundingParser
5
+ module Parser
6
+ class Kickstarter < General
7
+ def initialize
8
+ @platform_url = "https://www.kickstarter.com"
9
+ @category_ids = [1, 3, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 26]
10
+ @parse_method = :doc
11
+ end
12
+
13
+ def get_all_categories(status = "online")
14
+ status_code = get_status_code(status)
15
+ jsons = get_category_project_jsons(status_code)
16
+ jsons.flatten.compact!
17
+ categories = []
18
+ Parallel.each(jsons, in_precesses: 2, in_threads: 5) do |json|
19
+ category = { id: json["category"]["id"], name: json["category"]["name"], parent_id: json["category"]["parent_id"]}
20
+ categories << category
21
+ end
22
+ categories.uniq
23
+ end
24
+
25
+ def get_project_links(status = "online")
26
+ status_code = get_status_code(status)
27
+
28
+ jsons = @category_ids.map do |category_id|
29
+ category_jsons = get_category_project_jsons(status_code, category_id)
30
+ end.flatten.compact
31
+
32
+ Parallel.map(jsons, in_precesses: 2, in_threads: 5) do |json|
33
+ unless json["state"] != "live" && json["pledged"].to_i == 0
34
+ if json["state"] == status_code
35
+ project_url = json["urls"]["web"]["project"]
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+ def get_category_project_jsons(status_code = "live", category_id = 0)
42
+ jsons = []
43
+
44
+ Parallel.each(1..200, in_precesses: 2, in_threads: 5) do |i|
45
+ begin
46
+ api_url = get_projects_page_api(i, status_code, category_id)
47
+ json = get_json_through_url(api_url)["projects"]
48
+ jsons << json
49
+ rescue Exception => e
50
+ Parallel::Stop
51
+ end
52
+ end
53
+ jsons
54
+ end
55
+
56
+ private
57
+
58
+ def get_project_page_api(project_url)
59
+ project_url.split("?").first + ".json"
60
+ end
61
+
62
+ def get_projects_page_api(page = 1, status_code = "live", category_id = 0)
63
+ "https://www.kickstarter.com/projects/search.json?page=#{page}&state=#{status_code}&category_id=#{category_id}"
64
+ end
65
+
66
+ def get_project_search_doc_api(name)
67
+ "https://www.kickstarter.com/projects/search.json?term=#{name}"
68
+ end
69
+
70
+ def get_status_code(status)
71
+ case status
72
+ when "online"
73
+ "live"
74
+ when "finished"
75
+ "successful"
76
+ else
77
+ "live"
78
+ end
79
+ end
80
+
81
+ MethodBuilder.set_methods do
82
+ insert_parser "Kickstarter"
83
+
84
+ set_variable do
85
+ @platform_url = "https://www.kickstarter.com"
86
+ end
87
+
88
+ set_method :get_title do |doc|
89
+ get_string(doc.css(".NS_projects__header h2 .green-dark"))
90
+ end
91
+
92
+ set_method :get_category do |doc|
93
+ get_string(doc.css(".container-flex .h5 a.grey-dark:nth-child(2) b"))
94
+ end
95
+
96
+ set_method :get_creator_name do |doc|
97
+ get_string(doc.css(".NS_projects__creator .col-8>h5 a.remote_modal_dialog"))
98
+ end
99
+
100
+ set_method :get_creator_id do |doc|
101
+ creator_path = doc.css(".NS_projects__creator .col-8>h5 a.remote_modal_dialog").first["href"]
102
+ creator_path.split("/")[-3]
103
+ end
104
+
105
+ set_method :get_creator_link do |doc|
106
+ @platform_url + doc.css(".NS_projects__creator .col-8>h5 a.remote_modal_dialog").first["href"]
107
+ end
108
+
109
+ set_method :get_summary do |doc|
110
+ get_string(doc.css(".container-flex .col-8 .mobile-hide p.h3.mb3"))
111
+ end
112
+
113
+ set_method :get_end_date do |doc|
114
+ doc.css(".NS_projects__deadline_copy p.grey-dark time[datetime]").try(:first).try(:[], "datetime")
115
+ end
116
+
117
+ set_method :get_region do |doc|
118
+ get_string(doc.css(".container-flex .h5 a.grey-dark:nth-child(1) b"))
119
+ end
120
+
121
+ set_method :get_money_pledged do |doc|
122
+ doc.css("div[data-pledged]").first["data-pledged"]
123
+ end
124
+
125
+ set_method :get_money_goal do |doc|
126
+ doc.css("div[data-pledged]").first["data-goal"]
127
+ end
128
+
129
+ set_method :get_backer_count do |doc|
130
+ doc.css("div[data-backers-count]").first["data-backers-count"]
131
+ end
132
+
133
+ set_method :get_left_time do |doc|
134
+ end_date = doc.css("div[data-end_time]").try(:first).try(:[], "data-end_time") || Time.now.to_s
135
+ last_seconds = Time.parse(end_date) - Time.now
136
+ last_day = last_seconds.to_i / 86400
137
+ if last_day <= 0
138
+ "已結束"
139
+ else
140
+ last_day.to_s + "天"
141
+ end
142
+ end
143
+
144
+ set_method :get_status do |left_time|
145
+ if left_time == "已結束"
146
+ "finished"
147
+ else
148
+ "online"
149
+ end
150
+ end
151
+
152
+ set_method :get_currency_string do |doc|
153
+ doc.css("data[data-currency]")[0]["data-currency"]
154
+ end
155
+ end
156
+ end
157
+ end
158
+ end
@@ -0,0 +1,150 @@
1
+ require "json"
2
+ require "iconv"
3
+
4
+ module CrowdFundingParser
5
+ module Parser
6
+ class Taobao < General
7
+ def initialize
8
+ @parse_method = :json
9
+ @url = "http://hi.taobao.com/market/hi/detail2014.php?id="
10
+ end
11
+
12
+ def get_project_links(status = "online")
13
+ status_code = get_status_code(status)
14
+ jsons = get_total_jsons(status_code)
15
+ links = []
16
+
17
+ Parallel.each(jsons, in_precesses: 2, in_threads: 5) do |json|
18
+ project_id = json["id"].to_s
19
+ project_url = @url + project_id
20
+ links << project_url
21
+ end
22
+ links
23
+ end
24
+
25
+ def get_status_code(status)
26
+ case status
27
+ when "online"
28
+ 1
29
+ when "preparing"
30
+ 3
31
+ when "finished"
32
+ 2
33
+ else
34
+ 1
35
+ end
36
+ end
37
+
38
+ def get_total_jsons(status = 1)
39
+ urls = get_total_json_apis(status)
40
+ jsons = []
41
+ urls.each do |url|
42
+ page_json = get_json_through_url(url)
43
+ json = page_json["data"]
44
+ jsons += json
45
+ end
46
+ jsons
47
+ end
48
+
49
+ def get_total_json_apis(status = 1)
50
+ page_count = get_total_page(status)
51
+ total_urls = []
52
+ page_count.to_i.times do |i|
53
+ total_urls << get_projects_page_api(i + 1, status)
54
+ end
55
+ total_urls
56
+ end
57
+
58
+ def get_total_page(status = 1)
59
+ url = "http://hstar-hi.alicdn.com/dream/ajax/getProjectList.htm?page=1&pageSize=20&projectType=&type=6&status=#{status}"
60
+ json = get_json_through_url(url)
61
+ page_count = json["pageTotal"]
62
+ end
63
+
64
+ private
65
+
66
+ def get_project_api(project_id)
67
+ "http://hstar-hi.alicdn.com/dream/ajax/getProjectForDetail.htm?id=#{project_id}"
68
+ end
69
+
70
+ def get_projects_page_api(page = 1, status = 1)
71
+ "http://hstar-hi.alicdn.com/dream/ajax/getProjectList.htm?page=#{page}&pageSize=20&projectType=&type=6&status=#{status}"
72
+ end
73
+
74
+ def get_id(project_url)
75
+ project_url.split("id=").last
76
+ end
77
+
78
+ MethodBuilder.set_methods do
79
+ insert_parser "Taobao"
80
+
81
+ set_method :get_title do |result|
82
+ result["data"]["name"]
83
+ end
84
+
85
+ set_method :get_creator_name do |result|
86
+ raw_creator_name = result["data"]["person"]["name"]
87
+ @parser.encode_gbk_to_utf(raw_creator_name)
88
+ end
89
+
90
+ set_method :get_summary do |result|
91
+ raw_summary = result["data"]["desc"]
92
+ @parser.encode_gbk_to_utf(raw_summary)
93
+ end
94
+
95
+ set_method :get_start_date do |result|
96
+ result["data"]["begin_date"]
97
+ end
98
+
99
+ set_method :get_end_date do |result|
100
+ result["data"]["end_date"]
101
+ end
102
+
103
+ set_method :get_region do |result|
104
+ "China"
105
+ end
106
+
107
+ set_method :get_money_goal do |result|
108
+ result["data"]["target_money"]
109
+ end
110
+
111
+ set_method :get_money_pledged do |result|
112
+ result["data"]["curr_money"]
113
+ end
114
+
115
+ set_method :get_backer_count do |result|
116
+ result["data"]["support_person"]
117
+ end
118
+
119
+ set_method :get_left_time do |result|
120
+ result["data"]["remain_day"]
121
+ end
122
+
123
+ set_method :get_status do |left_time|
124
+ if left_time == "0"
125
+ "finished"
126
+ else
127
+ "online"
128
+ end
129
+ end
130
+
131
+ set_method :get_following_count do |result|
132
+ result["data"]["focus_count"]
133
+ end
134
+
135
+ set_method :get_currency_string do |result|
136
+ "cny"
137
+ end
138
+
139
+ set_method :encode_gbk_to_utf do |string|
140
+ begin
141
+ Iconv.conv("utf-8//ignore", "gb2312//ignore", string)
142
+ rescue Exception => e
143
+ puts e
144
+ string
145
+ end
146
+ end
147
+ end
148
+ end
149
+ end
150
+ end