crowd_funding_parser 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 431eb1ced64d76041c587114664d712ad213869a
4
- data.tar.gz: 2d0b7cb076f446a83b41171fb9f0af98e7d1a4ad
3
+ metadata.gz: a49010d36a36cab50f0c001bb1d368934c1126a3
4
+ data.tar.gz: 177a84fa3773c046fd6e4488c3470aae83c25cef
5
5
  SHA512:
6
- metadata.gz: 27b2259f0552a50fda78ed86047a9ef3b5f7f2d28c9be8f10feed1d1106cdfe756d25cfc0a14989c88f80dfe3ef037b675ec86208e98a0b7265df98a5ba92f52
7
- data.tar.gz: 62eede16e512955c3b3e0c8616402d8cbd8ff4938e6b5a6532b6a7064018b49e12c71ec1b561573b3611d4752822568f1fe8d58190468dafbd2f73fd45f4e394
6
+ metadata.gz: 5e3129df45328fb30f828ce77de424813926dae5d169da8ca66e0e3c55dcf5f5590e02bebf2810be137c78d2e2fbe59b251250b453e39c064678a9422045392a
7
+ data.tar.gz: 028b83e89377e5c631893be152b902e4f6430c187d43d5fedc5e4e18b67d0dfdfc80417c0d66e39d25a9026f51ef5432bc0cdd3d188fe6e376b680e3d4fb94aa
data/.gitignore CHANGED
@@ -15,4 +15,4 @@ mkmf.log
15
15
  .DS_Store
16
16
  */.DS_Store
17
17
  .DS_Store?
18
- *.gem
18
+ *.gem
@@ -0,0 +1,17 @@
1
+ language: ruby
2
+ cache: bundler
3
+
4
+ rvm:
5
+ - 2.1.2
6
+
7
+ script: 'bundle exec rspec'
8
+
9
+ notifications:
10
+ slack:
11
+ rooms:
12
+ - backer-founder:gS9GQrOqRKRogUEU5ajHYFZ9#crowd-trail
13
+ email:
14
+ recipients:
15
+ - stan001212@gmail.com
16
+ on_failure: change
17
+ on_success: never
data/Gemfile CHANGED
@@ -2,3 +2,5 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in crowd_funding_parser.gemspec
4
4
  gemspec
5
+
6
+ gem "pry-rails"
@@ -0,0 +1,11 @@
1
+ guard 'rspec', cmd: "bundle exec rspec" do
2
+ # watch /lib/ files
3
+ watch(%r{^lib/(.+).rb$}) do |m|
4
+ "spec/#{m[1]}_spec.rb"
5
+ end
6
+
7
+ # watch /spec/ files
8
+ watch(%r{^spec/(.+).rb$}) do |m|
9
+ "spec/#{m[1]}.rb"
10
+ end
11
+ end
data/README.md CHANGED
@@ -1,3 +1,5 @@
1
+ [![Build Status](https://travis-ci.org/BackerFounder/crowd_funding_parser.svg)](https://travis-ci.org/BackerFounder/crowd_funding_parser)
2
+
1
3
  # CrowdFundingParser
2
4
 
3
5
  TODO: Write a gem description
data/Rakefile CHANGED
@@ -1,2 +1,10 @@
1
+ require "rspec/core/rake_task"
1
2
  require "bundler/gem_tasks"
3
+ # Default directory to look in is `/specs`
4
+ # Run with `rake spec`
5
+ RSpec::Core::RakeTask.new(:spec) do |task|
6
+ task.rspec_opts = ['--color', '--format', 'nested']
7
+ end
8
+
9
+ task default: :spec
2
10
 
@@ -18,8 +18,20 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ["lib"]
20
20
 
21
+ spec.add_development_dependency "webmock"
22
+ spec.add_development_dependency "iconv"
23
+ spec.add_development_dependency "activesupport"
24
+ spec.add_development_dependency "vcr", "~> 2.9.3"
25
+ spec.add_development_dependency "rspec"
26
+ spec.add_development_dependency "rspec-nc"
27
+ spec.add_development_dependency "guard"
28
+ spec.add_development_dependency "guard-rspec"
29
+ spec.add_development_dependency "pry"
30
+ spec.add_development_dependency "pry-remote"
31
+ spec.add_development_dependency "pry-nav"
21
32
  spec.add_development_dependency "bundler", "~> 1.6"
22
33
  spec.add_development_dependency "rake", "~> 10.0"
23
34
  spec.add_runtime_dependency "parallel", "~> 1.3"
24
35
  spec.add_runtime_dependency "nokogiri", "~> 1.6"
36
+ spec.add_runtime_dependency "httparty", "~> 0.13.3"
25
37
  end
@@ -4,3 +4,7 @@ require "crowd_funding_parser/general"
4
4
  require "crowd_funding_parser/parser/flyingv"
5
5
  require "crowd_funding_parser/parser/webackers"
6
6
  require "crowd_funding_parser/parser/zeczec"
7
+ require "crowd_funding_parser/parser/taobao"
8
+ require "crowd_funding_parser/parser/kickstarter"
9
+ require "crowd_funding_parser/parser/hereo"
10
+ require "crowd_funding_parser/parser/an9"
@@ -1,50 +1,53 @@
1
+ require 'httparty'
2
+ require "active_support/all"
3
+ require_relative "method_builder"
4
+
1
5
  module CrowdFundingParser
2
6
  module Parser
3
7
  class General
4
- def parse_tracking_data(doc, rel_url)
5
- project_url = @url + rel_url
8
+ include HTTParty
9
+
10
+ def parse_tracking_data(result, project_url)
6
11
  project = Hash.new
7
- project['money_goal'] = get_money_goal(doc).to_i
8
- project['money_pledged'] = get_money_pledged(doc).to_i
9
- project['backer_count'] = get_backer_count(doc).to_i
10
- project['last_time'] = get_last_time(doc)
11
- project['status'] = get_status(project['last_time'])
12
- # project['backer_list'] = get_backer_list(project_url)
13
- project['fb_count'] = get_fb_count(doc).to_i
14
- project['following_count'] = get_following_count(doc).to_i
12
+ project["money_goal"] = get_money_goal(result).to_i
13
+ project["money_pledged"] = get_money_pledged(result).to_i
14
+ project["backer_count"] = get_backer_count(result).to_i
15
+ project["left_time"] = get_left_time(result)
16
+ project["status"] = get_status(project["left_time"])
17
+ project["fb_count"] = get_fb_count(result).to_i
18
+ project["following_count"] = get_following_count(result).to_i
15
19
  project
16
20
  end
17
21
 
18
- def parse_content_data(doc, rel_url)
19
- project_url = @url + rel_url
20
- project = Hash.new
21
- project['platform_project_id'] = get_id(rel_url)
22
- project['title'] = get_title(doc)
23
- project['url'] = project_url
24
- project['summary'] = get_summary(doc)
25
- project['category'] = get_category(doc)
26
- project['creator_name'] = get_creator_name(doc)
27
- project['creator_id'] = get_creator_id(doc)
28
- project['creator_link'] = get_creator_link(doc)
22
+ def parse_content_data(result, project_url)
23
+ project = Hash.new
24
+ project["platform_project_id"] = get_id(project_url)
25
+ project["title"] = get_title(result)
26
+ project["url"] = project_url
27
+ project["summary"] = get_summary(result)
28
+ project["category"] = get_category(result)
29
+ project["creator_name"] = get_creator_name(result)
30
+ project["creator_id"] = get_creator_id(result)
31
+ project["creator_link"] = get_creator_link(result)
32
+ project["currency_string"] = get_currency_string(result)
33
+ project["start_date"] = get_start_date(result)
34
+ project["end_date"] = get_end_date(result)
35
+ project["region"] = get_region(result)
29
36
  project
30
37
  end
31
38
 
32
39
  def get_project_links(required_status = "online")
33
40
  links = []
34
-
35
- @targets.each do |target|
41
+
42
+ get_lists.each do |target|
36
43
  doc = Nokogiri::HTML(target)
37
44
  online_projects = doc.css(@item_css_class)
38
45
 
39
46
  Parallel.map(online_projects, in_processes: 2 , in_threads: 4) do |project|
40
47
  link_nodes = project.css("a:nth-child(1)")
41
48
  status = get_status(get_string(project.css(@status_css_class)))
42
- link = link_nodes.first["href"]
43
- if status == "finished" && required_status == "finished"
44
- links << link
45
- elsif status == "online" && required_status == "online"
46
- links << link
47
- elsif status == "preparing" && required_status == "preparing"
49
+ link = @platform_url + link_nodes.first["href"]
50
+ if status == required_status
48
51
  links << link
49
52
  end
50
53
  end
@@ -53,42 +56,49 @@ module CrowdFundingParser
53
56
  links
54
57
  end
55
58
 
56
- def get_project_log(url)
57
- url.gsub!("#{@url}", "")
58
- parse_tracking_data(url)
59
+ def get_result(project_url)
60
+ if @parse_method == :json
61
+ project_id = get_id(project_url)
62
+ project_api = get_project_api(project_id)
63
+ get_json_through_url(project_api)
64
+ else
65
+ get_doc_through_url(project_url)
66
+ end
67
+ end
68
+
69
+ def get_doc_through_url(project_url)
70
+ project_html = HTTParty.get(project_url)
71
+ Nokogiri::HTML(project_html)
59
72
  end
60
73
 
61
- def get_project_content(url)
62
- url.gsub!("#{@url}", "")
63
- parse_content_data(url)
74
+ def get_json_through_url(project_url)
75
+ httparty_url = HTTParty.get(project_url)
76
+ json = JSON.load(httparty_url.body)
64
77
  end
65
78
 
66
- def get_full_project(url)
67
- get_project_content(url).merge(get_project_log(url))
79
+ def get_project(project_url)
80
+ result = get_result(project_url)
81
+ parse_content_data(result, project_url).merge parse_tracking_data(result, project_url)
68
82
  end
69
83
 
70
- def get_doc_through_url(rel_url)
71
- project_url = @url + rel_url
72
- project_html = open(project_url)
73
- Nokogiri::HTML(project_html)
84
+ def get_id(project_url)
85
+ rel_url = get_rel_url(project_url)
86
+ rel_url.split("/").last.split("?").first
74
87
  end
75
88
 
76
89
  private
77
90
 
78
- def get_string(elements)
79
- elements.first.text.strip
91
+ def get_rel_url(url)
92
+ url.gsub("#{@platform_url}", "")
80
93
  end
81
94
 
82
- def money_string(money)
83
- money.sub('$', '').sub(',', '').sub('NT', "")
84
- end
85
-
86
- def convert_time(left_time)
87
- days = ((left_time / (60 * 60 * 24))).to_i
88
- hours = ((left_time / (60 * 60)) % 24).to_i
89
- minutes = ((left_time / 60) % 60).to_i
90
- "#{days}天#{hours}小時#{minutes}分鐘"
95
+ def method_missing(m, *args, &block)
96
+ if m.to_s.match(/get/)
97
+ ""
98
+ else
99
+ super
100
+ end
91
101
  end
92
102
  end
93
103
  end
94
- end
104
+ end
@@ -0,0 +1,47 @@
1
+ module MethodBuilder
2
+ def self.set_methods(&block)
3
+ ParserMethodProxy.new.instance_eval(&block)
4
+ end
5
+
6
+ class ParserMethodProxy
7
+ def insert_parser(inserted_class)
8
+ @parser_class = "CrowdFundingParser::Parser::#{inserted_class}".constantize
9
+ @parser = @parser_class.new
10
+ end
11
+
12
+ def set_variable(&block)
13
+ block.call
14
+ end
15
+
16
+ def set_method(method_name, &block)
17
+ @parser_class.send(:define_method, method_name) do |arg|
18
+ begin
19
+ block.call(arg)
20
+ rescue Exception => e
21
+ puts "Error #{e.message}"
22
+ puts e.backtrace.first
23
+ ""
24
+ end
25
+ end
26
+ end
27
+
28
+ def method_missing(m, *args, &block)
29
+ ""
30
+ end
31
+
32
+ def get_string(elements)
33
+ elements.first.text.strip
34
+ end
35
+
36
+ def money_string(money)
37
+ money.gsub("$","").gsub(",", "").gsub("NT", "")
38
+ end
39
+
40
+ def convert_time(left_time)
41
+ days = ((left_time / (60 * 60 * 24))).to_i
42
+ hours = ((left_time / (60 * 60)) % 24).to_i
43
+ minutes = ((left_time / 60) % 60).to_i
44
+ "#{days}天#{hours}小時#{minutes}分鐘"
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,196 @@
1
+ require "json"
2
+ require 'open-uri'
3
+ require "iconv"
4
+
5
+ module CrowdFundingParser
6
+ module Parser
7
+ class An9 < General
8
+ def initialize
9
+ @url = "http://www.an9.com.tw/Dream/"
10
+ @status_css_class = ".sideCon>a"
11
+ end
12
+
13
+ def get_main_categories(add_categories)
14
+ add_categories.select { |c| c[:parent_id].nil? }
15
+ end
16
+
17
+ def get_project_links(required_status = "online")
18
+ links = []
19
+ error_count = 0
20
+ not_found_count = 0
21
+ Parallel.each(1..100000, in_precesses: 2, in_threads: 5, progress: "Get #{self} links") do |i|
22
+ begin
23
+ link = @url + i.to_s
24
+ project = get_doc_through_url(link)
25
+ not_found_message = project.css(".actMsg p")
26
+ if not_found_message.present? && get_string(not_found_message).match(/不存在/)
27
+ not_found_count += 1
28
+ else
29
+ status = get_status(get_string(project.css(@status_css_class)))
30
+
31
+ if status == required_status
32
+ links << link
33
+ end
34
+ not_found_count = 0
35
+ error_count = 0
36
+ end
37
+ rescue Exception => e
38
+ error_count += 1
39
+ raise Parallel::Break if not_found_count >= 50 || error_count >= 50
40
+ end
41
+ end
42
+
43
+ links
44
+ end
45
+
46
+ private
47
+
48
+ def get_title(result)
49
+ if @parse_method == :doc
50
+ get_string(result.css(".NS_projects__header h2 .green-dark"))
51
+ else
52
+ result["name"]
53
+ end
54
+ end
55
+
56
+ def get_category(result)
57
+ if @parse_method == :doc
58
+ get_string(result.css(".container-flex .h5 a.grey-dark:nth-child(2) b"))
59
+ else
60
+ result["category"]["name"]
61
+ end
62
+ end
63
+
64
+ def get_creator_name(result)
65
+ if @parse_method == :doc
66
+ get_string(result.css(".NS_projects__creator .col-8>h5 a.remote_modal_dialog"))
67
+ else
68
+ result["creator"]["name"]
69
+ end
70
+ end
71
+
72
+ def get_creator_id(result)
73
+ if @parse_method == :doc
74
+ creator_link = result.css(".NS_projects__creator .col-8>h5 a.remote_modal_dialog").first["href"]
75
+ creator_link.split("/")[-3]
76
+ else
77
+ result["creator"]["id"]
78
+ end
79
+ end
80
+
81
+ def get_creator_link(result)
82
+ if @parse_method == :doc
83
+ creator_link = @url + result.css(".NS_projects__creator .col-8>h5 a.remote_modal_dialog").first["href"]
84
+ else
85
+ result["creator"]["urls"]["web"]["user"]
86
+ end
87
+ end
88
+
89
+ def get_summary(result)
90
+ if @parse_method == :doc
91
+ get_string(result.css(".container-flex .col-8 .mobile-hide p.h3.mb3"))
92
+ else
93
+ result["blurb"]
94
+ end
95
+ end
96
+
97
+ def get_start_date(result)
98
+ if @parse_method == :doc
99
+ # no start date on page
100
+ else
101
+ Time.at(result["launched_at"])
102
+ end
103
+ end
104
+
105
+ def get_end_date(result)
106
+ if @parse_method == :doc
107
+ result.css(".NS_projects__deadline_copy p.grey-dark time[datetime]")[0]["datetime"]
108
+ else
109
+ time = Time.at(result["deadline"])
110
+ end
111
+ end
112
+
113
+ def get_region(result)
114
+ if @parse_method == :doc
115
+ get_string(result.css(".container-flex .h5 a.grey-dark:nth-child(1) b"))
116
+ else
117
+ result["location"]["displayable_name"]
118
+ end
119
+ end
120
+
121
+ # for tracking
122
+
123
+ def get_money_goal(result)
124
+ if @parse_method == :doc
125
+ result.css("div[data-pledged]").first["data-goal"]
126
+ else
127
+ result["goal"]
128
+ end
129
+ end
130
+
131
+ def get_money_pledged(result)
132
+ if @parse_method == :doc
133
+ result.css("div[data-pledged]").first["data-pledged"]
134
+ else
135
+ result["pledged"]
136
+ end
137
+ end
138
+
139
+ def get_backer_count(result)
140
+ if @parse_method == :doc
141
+ result.css("div[data-backers-count]").first["data-backers-count"]
142
+ else
143
+ result["backers_count"]
144
+ end
145
+ end
146
+
147
+ def get_left_time(result)
148
+ if @parse_method == :doc
149
+ end_date = result.css("div[data-end_time]").first["data-end_time"]
150
+ last_seconds = Time.parse(end_date) - Time.now
151
+ else
152
+ last_seconds = result["deadline"].to_i - Time.now.to_i
153
+ end
154
+ last_day = last_seconds.to_i / 86400
155
+ if last_day <= 0
156
+ "已結束"
157
+ else
158
+ last_day.to_s + "天"
159
+ end
160
+ end
161
+
162
+ def get_status(button_text)
163
+ case button_text
164
+ when /贊助/
165
+ "online"
166
+ when /喜歡/
167
+ "voting"
168
+ when /結束|成功/
169
+ "finished"
170
+ else
171
+ "online"
172
+ end
173
+ end
174
+
175
+ def get_fb_count(result)
176
+
177
+ end
178
+
179
+ def get_following_count(result)
180
+
181
+ end
182
+
183
+ def get_backer_list(project_url)
184
+ []
185
+ end
186
+
187
+ def get_currency_string(result)
188
+ if @parse_method == :doc
189
+ result.css("data[data-currency]")[0]["data-currency"]
190
+ else
191
+ result["currency"]
192
+ end
193
+ end
194
+ end
195
+ end
196
+ end