crowd_funding_parser 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 431eb1ced64d76041c587114664d712ad213869a
4
- data.tar.gz: 2d0b7cb076f446a83b41171fb9f0af98e7d1a4ad
3
+ metadata.gz: a49010d36a36cab50f0c001bb1d368934c1126a3
4
+ data.tar.gz: 177a84fa3773c046fd6e4488c3470aae83c25cef
5
5
  SHA512:
6
- metadata.gz: 27b2259f0552a50fda78ed86047a9ef3b5f7f2d28c9be8f10feed1d1106cdfe756d25cfc0a14989c88f80dfe3ef037b675ec86208e98a0b7265df98a5ba92f52
7
- data.tar.gz: 62eede16e512955c3b3e0c8616402d8cbd8ff4938e6b5a6532b6a7064018b49e12c71ec1b561573b3611d4752822568f1fe8d58190468dafbd2f73fd45f4e394
6
+ metadata.gz: 5e3129df45328fb30f828ce77de424813926dae5d169da8ca66e0e3c55dcf5f5590e02bebf2810be137c78d2e2fbe59b251250b453e39c064678a9422045392a
7
+ data.tar.gz: 028b83e89377e5c631893be152b902e4f6430c187d43d5fedc5e4e18b67d0dfdfc80417c0d66e39d25a9026f51ef5432bc0cdd3d188fe6e376b680e3d4fb94aa
data/.gitignore CHANGED
@@ -15,4 +15,4 @@ mkmf.log
15
15
  .DS_Store
16
16
  */.DS_Store
17
17
  .DS_Store?
18
- *.gem
18
+ *.gem
@@ -0,0 +1,17 @@
1
+ language: ruby
2
+ cache: bundler
3
+
4
+ rvm:
5
+ - 2.1.2
6
+
7
+ script: 'bundle exec rspec'
8
+
9
+ notifications:
10
+ slack:
11
+ rooms:
12
+ - backer-founder:gS9GQrOqRKRogUEU5ajHYFZ9#crowd-trail
13
+ email:
14
+ recipients:
15
+ - stan001212@gmail.com
16
+ on_failure: change
17
+ on_success: never
data/Gemfile CHANGED
@@ -2,3 +2,5 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in crowd_funding_parser.gemspec
4
4
  gemspec
5
+
6
+ gem "pry-rails"
@@ -0,0 +1,11 @@
1
+ guard 'rspec', cmd: "bundle exec rspec" do
2
+ # watch /lib/ files
3
+ watch(%r{^lib/(.+).rb$}) do |m|
4
+ "spec/#{m[1]}_spec.rb"
5
+ end
6
+
7
+ # watch /spec/ files
8
+ watch(%r{^spec/(.+).rb$}) do |m|
9
+ "spec/#{m[1]}.rb"
10
+ end
11
+ end
data/README.md CHANGED
@@ -1,3 +1,5 @@
1
+ [![Build Status](https://travis-ci.org/BackerFounder/crowd_funding_parser.svg)](https://travis-ci.org/BackerFounder/crowd_funding_parser)
2
+
1
3
  # CrowdFundingParser
2
4
 
3
5
  TODO: Write a gem description
data/Rakefile CHANGED
@@ -1,2 +1,10 @@
1
+ require "rspec/core/rake_task"
1
2
  require "bundler/gem_tasks"
3
+ # Default directory to look in is `/specs`
4
+ # Run with `rake spec`
5
+ RSpec::Core::RakeTask.new(:spec) do |task|
6
+ task.rspec_opts = ['--color', '--format', 'nested']
7
+ end
8
+
9
+ task default: :spec
2
10
 
@@ -18,8 +18,20 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ["lib"]
20
20
 
21
+ spec.add_development_dependency "webmock"
22
+ spec.add_development_dependency "iconv"
23
+ spec.add_development_dependency "activesupport"
24
+ spec.add_development_dependency "vcr", "~> 2.9.3"
25
+ spec.add_development_dependency "rspec"
26
+ spec.add_development_dependency "rspec-nc"
27
+ spec.add_development_dependency "guard"
28
+ spec.add_development_dependency "guard-rspec"
29
+ spec.add_development_dependency "pry"
30
+ spec.add_development_dependency "pry-remote"
31
+ spec.add_development_dependency "pry-nav"
21
32
  spec.add_development_dependency "bundler", "~> 1.6"
22
33
  spec.add_development_dependency "rake", "~> 10.0"
23
34
  spec.add_runtime_dependency "parallel", "~> 1.3"
24
35
  spec.add_runtime_dependency "nokogiri", "~> 1.6"
36
+ spec.add_runtime_dependency "httparty", "~> 0.13.3"
25
37
  end
@@ -4,3 +4,7 @@ require "crowd_funding_parser/general"
4
4
  require "crowd_funding_parser/parser/flyingv"
5
5
  require "crowd_funding_parser/parser/webackers"
6
6
  require "crowd_funding_parser/parser/zeczec"
7
+ require "crowd_funding_parser/parser/taobao"
8
+ require "crowd_funding_parser/parser/kickstarter"
9
+ require "crowd_funding_parser/parser/hereo"
10
+ require "crowd_funding_parser/parser/an9"
@@ -1,50 +1,53 @@
1
+ require 'httparty'
2
+ require "active_support/all"
3
+ require_relative "method_builder"
4
+
1
5
  module CrowdFundingParser
2
6
  module Parser
3
7
  class General
4
- def parse_tracking_data(doc, rel_url)
5
- project_url = @url + rel_url
8
+ include HTTParty
9
+
10
+ def parse_tracking_data(result, project_url)
6
11
  project = Hash.new
7
- project['money_goal'] = get_money_goal(doc).to_i
8
- project['money_pledged'] = get_money_pledged(doc).to_i
9
- project['backer_count'] = get_backer_count(doc).to_i
10
- project['last_time'] = get_last_time(doc)
11
- project['status'] = get_status(project['last_time'])
12
- # project['backer_list'] = get_backer_list(project_url)
13
- project['fb_count'] = get_fb_count(doc).to_i
14
- project['following_count'] = get_following_count(doc).to_i
12
+ project["money_goal"] = get_money_goal(result).to_i
13
+ project["money_pledged"] = get_money_pledged(result).to_i
14
+ project["backer_count"] = get_backer_count(result).to_i
15
+ project["left_time"] = get_left_time(result)
16
+ project["status"] = get_status(project["left_time"])
17
+ project["fb_count"] = get_fb_count(result).to_i
18
+ project["following_count"] = get_following_count(result).to_i
15
19
  project
16
20
  end
17
21
 
18
- def parse_content_data(doc, rel_url)
19
- project_url = @url + rel_url
20
- project = Hash.new
21
- project['platform_project_id'] = get_id(rel_url)
22
- project['title'] = get_title(doc)
23
- project['url'] = project_url
24
- project['summary'] = get_summary(doc)
25
- project['category'] = get_category(doc)
26
- project['creator_name'] = get_creator_name(doc)
27
- project['creator_id'] = get_creator_id(doc)
28
- project['creator_link'] = get_creator_link(doc)
22
+ def parse_content_data(result, project_url)
23
+ project = Hash.new
24
+ project["platform_project_id"] = get_id(project_url)
25
+ project["title"] = get_title(result)
26
+ project["url"] = project_url
27
+ project["summary"] = get_summary(result)
28
+ project["category"] = get_category(result)
29
+ project["creator_name"] = get_creator_name(result)
30
+ project["creator_id"] = get_creator_id(result)
31
+ project["creator_link"] = get_creator_link(result)
32
+ project["currency_string"] = get_currency_string(result)
33
+ project["start_date"] = get_start_date(result)
34
+ project["end_date"] = get_end_date(result)
35
+ project["region"] = get_region(result)
29
36
  project
30
37
  end
31
38
 
32
39
  def get_project_links(required_status = "online")
33
40
  links = []
34
-
35
- @targets.each do |target|
41
+
42
+ get_lists.each do |target|
36
43
  doc = Nokogiri::HTML(target)
37
44
  online_projects = doc.css(@item_css_class)
38
45
 
39
46
  Parallel.map(online_projects, in_processes: 2 , in_threads: 4) do |project|
40
47
  link_nodes = project.css("a:nth-child(1)")
41
48
  status = get_status(get_string(project.css(@status_css_class)))
42
- link = link_nodes.first["href"]
43
- if status == "finished" && required_status == "finished"
44
- links << link
45
- elsif status == "online" && required_status == "online"
46
- links << link
47
- elsif status == "preparing" && required_status == "preparing"
49
+ link = @platform_url + link_nodes.first["href"]
50
+ if status == required_status
48
51
  links << link
49
52
  end
50
53
  end
@@ -53,42 +56,49 @@ module CrowdFundingParser
53
56
  links
54
57
  end
55
58
 
56
- def get_project_log(url)
57
- url.gsub!("#{@url}", "")
58
- parse_tracking_data(url)
59
+ def get_result(project_url)
60
+ if @parse_method == :json
61
+ project_id = get_id(project_url)
62
+ project_api = get_project_api(project_id)
63
+ get_json_through_url(project_api)
64
+ else
65
+ get_doc_through_url(project_url)
66
+ end
67
+ end
68
+
69
+ def get_doc_through_url(project_url)
70
+ project_html = HTTParty.get(project_url)
71
+ Nokogiri::HTML(project_html)
59
72
  end
60
73
 
61
- def get_project_content(url)
62
- url.gsub!("#{@url}", "")
63
- parse_content_data(url)
74
+ def get_json_through_url(project_url)
75
+ httparty_url = HTTParty.get(project_url)
76
+ json = JSON.load(httparty_url.body)
64
77
  end
65
78
 
66
- def get_full_project(url)
67
- get_project_content(url).merge(get_project_log(url))
79
+ def get_project(project_url)
80
+ result = get_result(project_url)
81
+ parse_content_data(result, project_url).merge parse_tracking_data(result, project_url)
68
82
  end
69
83
 
70
- def get_doc_through_url(rel_url)
71
- project_url = @url + rel_url
72
- project_html = open(project_url)
73
- Nokogiri::HTML(project_html)
84
+ def get_id(project_url)
85
+ rel_url = get_rel_url(project_url)
86
+ rel_url.split("/").last.split("?").first
74
87
  end
75
88
 
76
89
  private
77
90
 
78
- def get_string(elements)
79
- elements.first.text.strip
91
+ def get_rel_url(url)
92
+ url.gsub("#{@platform_url}", "")
80
93
  end
81
94
 
82
- def money_string(money)
83
- money.sub('$', '').sub(',', '').sub('NT', "")
84
- end
85
-
86
- def convert_time(left_time)
87
- days = ((left_time / (60 * 60 * 24))).to_i
88
- hours = ((left_time / (60 * 60)) % 24).to_i
89
- minutes = ((left_time / 60) % 60).to_i
90
- "#{days}天#{hours}小時#{minutes}分鐘"
95
+ def method_missing(m, *args, &block)
96
+ if m.to_s.match(/get/)
97
+ ""
98
+ else
99
+ super
100
+ end
91
101
  end
92
102
  end
93
103
  end
94
- end
104
+ end
@@ -0,0 +1,47 @@
1
+ module MethodBuilder
2
+ def self.set_methods(&block)
3
+ ParserMethodProxy.new.instance_eval(&block)
4
+ end
5
+
6
+ class ParserMethodProxy
7
+ def insert_parser(inserted_class)
8
+ @parser_class = "CrowdFundingParser::Parser::#{inserted_class}".constantize
9
+ @parser = @parser_class.new
10
+ end
11
+
12
+ def set_variable(&block)
13
+ block.call
14
+ end
15
+
16
+ def set_method(method_name, &block)
17
+ @parser_class.send(:define_method, method_name) do |arg|
18
+ begin
19
+ block.call(arg)
20
+ rescue Exception => e
21
+ puts "Error #{e.message}"
22
+ puts e.backtrace.first
23
+ ""
24
+ end
25
+ end
26
+ end
27
+
28
+ def method_missing(m, *args, &block)
29
+ ""
30
+ end
31
+
32
+ def get_string(elements)
33
+ elements.first.text.strip
34
+ end
35
+
36
+ def money_string(money)
37
+ money.gsub("$","").gsub(",", "").gsub("NT", "")
38
+ end
39
+
40
+ def convert_time(left_time)
41
+ days = ((left_time / (60 * 60 * 24))).to_i
42
+ hours = ((left_time / (60 * 60)) % 24).to_i
43
+ minutes = ((left_time / 60) % 60).to_i
44
+ "#{days}天#{hours}小時#{minutes}分鐘"
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,196 @@
1
+ require "json"
2
+ require 'open-uri'
3
+ require "iconv"
4
+
5
+ module CrowdFundingParser
6
+ module Parser
7
+ class An9 < General
8
+ def initialize
9
+ @url = "http://www.an9.com.tw/Dream/"
10
+ @status_css_class = ".sideCon>a"
11
+ end
12
+
13
+ def get_main_categories(add_categories)
14
+ add_categories.select { |c| c[:parent_id].nil? }
15
+ end
16
+
17
+ def get_project_links(required_status = "online")
18
+ links = []
19
+ error_count = 0
20
+ not_found_count = 0
21
+ Parallel.each(1..100000, in_precesses: 2, in_threads: 5, progress: "Get #{self} links") do |i|
22
+ begin
23
+ link = @url + i.to_s
24
+ project = get_doc_through_url(link)
25
+ not_found_message = project.css(".actMsg p")
26
+ if not_found_message.present? && get_string(not_found_message).match(/不存在/)
27
+ not_found_count += 1
28
+ else
29
+ status = get_status(get_string(project.css(@status_css_class)))
30
+
31
+ if status == required_status
32
+ links << link
33
+ end
34
+ not_found_count = 0
35
+ error_count = 0
36
+ end
37
+ rescue Exception => e
38
+ error_count += 1
39
+ raise Parallel::Break if not_found_count >= 50 || error_count >= 50
40
+ end
41
+ end
42
+
43
+ links
44
+ end
45
+
46
+ private
47
+
48
+ def get_title(result)
49
+ if @parse_method == :doc
50
+ get_string(result.css(".NS_projects__header h2 .green-dark"))
51
+ else
52
+ result["name"]
53
+ end
54
+ end
55
+
56
+ def get_category(result)
57
+ if @parse_method == :doc
58
+ get_string(result.css(".container-flex .h5 a.grey-dark:nth-child(2) b"))
59
+ else
60
+ result["category"]["name"]
61
+ end
62
+ end
63
+
64
+ def get_creator_name(result)
65
+ if @parse_method == :doc
66
+ get_string(result.css(".NS_projects__creator .col-8>h5 a.remote_modal_dialog"))
67
+ else
68
+ result["creator"]["name"]
69
+ end
70
+ end
71
+
72
+ def get_creator_id(result)
73
+ if @parse_method == :doc
74
+ creator_link = result.css(".NS_projects__creator .col-8>h5 a.remote_modal_dialog").first["href"]
75
+ creator_link.split("/")[-3]
76
+ else
77
+ result["creator"]["id"]
78
+ end
79
+ end
80
+
81
+ def get_creator_link(result)
82
+ if @parse_method == :doc
83
+ creator_link = @url + result.css(".NS_projects__creator .col-8>h5 a.remote_modal_dialog").first["href"]
84
+ else
85
+ result["creator"]["urls"]["web"]["user"]
86
+ end
87
+ end
88
+
89
+ def get_summary(result)
90
+ if @parse_method == :doc
91
+ get_string(result.css(".container-flex .col-8 .mobile-hide p.h3.mb3"))
92
+ else
93
+ result["blurb"]
94
+ end
95
+ end
96
+
97
+ def get_start_date(result)
98
+ if @parse_method == :doc
99
+ # no start date on page
100
+ else
101
+ Time.at(result["launched_at"])
102
+ end
103
+ end
104
+
105
+ def get_end_date(result)
106
+ if @parse_method == :doc
107
+ result.css(".NS_projects__deadline_copy p.grey-dark time[datetime]")[0]["datetime"]
108
+ else
109
+ time = Time.at(result["deadline"])
110
+ end
111
+ end
112
+
113
+ def get_region(result)
114
+ if @parse_method == :doc
115
+ get_string(result.css(".container-flex .h5 a.grey-dark:nth-child(1) b"))
116
+ else
117
+ result["location"]["displayable_name"]
118
+ end
119
+ end
120
+
121
+ # for tracking
122
+
123
+ def get_money_goal(result)
124
+ if @parse_method == :doc
125
+ result.css("div[data-pledged]").first["data-goal"]
126
+ else
127
+ result["goal"]
128
+ end
129
+ end
130
+
131
+ def get_money_pledged(result)
132
+ if @parse_method == :doc
133
+ result.css("div[data-pledged]").first["data-pledged"]
134
+ else
135
+ result["pledged"]
136
+ end
137
+ end
138
+
139
+ def get_backer_count(result)
140
+ if @parse_method == :doc
141
+ result.css("div[data-backers-count]").first["data-backers-count"]
142
+ else
143
+ result["backers_count"]
144
+ end
145
+ end
146
+
147
+ def get_left_time(result)
148
+ if @parse_method == :doc
149
+ end_date = result.css("div[data-end_time]").first["data-end_time"]
150
+ last_seconds = Time.parse(end_date) - Time.now
151
+ else
152
+ last_seconds = result["deadline"].to_i - Time.now.to_i
153
+ end
154
+ last_day = last_seconds.to_i / 86400
155
+ if last_day <= 0
156
+ "已結束"
157
+ else
158
+ last_day.to_s + "天"
159
+ end
160
+ end
161
+
162
+ def get_status(button_text)
163
+ case button_text
164
+ when /贊助/
165
+ "online"
166
+ when /喜歡/
167
+ "voting"
168
+ when /結束|成功/
169
+ "finished"
170
+ else
171
+ "online"
172
+ end
173
+ end
174
+
175
+ def get_fb_count(result)
176
+
177
+ end
178
+
179
+ def get_following_count(result)
180
+
181
+ end
182
+
183
+ def get_backer_list(project_url)
184
+ []
185
+ end
186
+
187
+ def get_currency_string(result)
188
+ if @parse_method == :doc
189
+ result.css("data[data-currency]")[0]["data-currency"]
190
+ else
191
+ result["currency"]
192
+ end
193
+ end
194
+ end
195
+ end
196
+ end