socialcrawler 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0f04315836dd27d5ceaf9cc4d34490eb92c40bfc
4
- data.tar.gz: e1277912203a7ae9b52a4078e73ba48346cd5e0f
3
+ metadata.gz: 479202e48815c59eb775fc309eb5f771b27c7c16
4
+ data.tar.gz: b085817b3960a9397e0379561ecb27e81ea1fcd6
5
5
  SHA512:
6
- metadata.gz: 5c9b1577dc309c82657e6a083fc5fb529b14cea34719c5e7c70ad1e86a9dc5a83ae76ec72ca8c236eb0aec5e97ea2c06925fc7a31e5eb74ebaea9f52a9499a4f
7
- data.tar.gz: c5904e1b4a6fdc076f277cb504eab15e3d64f65f67ebb4caac730aff4f233ddfcce8daafbb0d8e3b8450bd1fbe5534543c757545f9c1adc841292715c9a4836a
6
+ metadata.gz: 38f539ee81079a7000ca42b6074caf4269db86a41992952d40b243d3f64970fd5319acad8aea8f488b113ff199438a986d8c51a407bc293aa92c8d184f2718a5
7
+ data.tar.gz: 93806c94207115dbe3ea49f0f6192eb8e9ad36fa853a13c2ff7fbfd247b5e9247fed6a975f1eb53ef3130c637686c0dd4eec7518e50c51f99cbfc595f267af99
@@ -1,9 +1,8 @@
1
1
  language: ruby
2
2
 
3
3
  rvm:
4
- - "1.9.3"
5
- - jruby-19mode
6
- - rbx
4
+ - "2.3.1"
5
+ - "2.4.0"
7
6
 
8
7
  addons:
9
8
  climate_control:
data/Gemfile CHANGED
@@ -7,3 +7,4 @@ gem 'nokogiri', '1.6.4.1'
7
7
  # test coverage
8
8
  gem 'coveralls', require: false, group: :test
9
9
  gem "codeclimate-test-reporter", group: :test, require: nil
10
+ gem 'test-unit'
@@ -28,7 +28,12 @@ module SocialCrawler
28
28
  @map = {
29
29
  twitter: 'twitter.com/',
30
30
  facebook: 'facebook.com/',
31
- google_plus: 'plus.google.com/'
31
+ google_plus: 'plus.google.com/',
32
+ instagram: 'www.instagram.com',
33
+ you_tube: 'youtube.com/user',
34
+ pinterest: 'pinterest.com/',
35
+ linked_in: 'linkedin.com/',
36
+ flickr: 'flickr.com/'
32
37
  }
33
38
  end
34
39
 
@@ -57,7 +62,7 @@ module SocialCrawler
57
62
  def crawl_url(url, log=nil)
58
63
  log = Logger.new(STDOUT) if log.nil?
59
64
  log.info("Crawling #{url}")
60
- result = Hash.new('NOT FOUND')
65
+ result = Hash.new(:NOT_FOUND)
61
66
  begin
62
67
  page = Nokogiri::HTML(open(url))
63
68
  title = page.css('title')
@@ -81,13 +86,7 @@ module SocialCrawler
81
86
  if not status_filename.nil? and File.exists?(status_filename)
82
87
  log.info("Loading previous status from #{status_filename}")
83
88
  CSV.foreach(status_filename) do |row|
84
- if row.count >= 3
85
- status[row[0]] = {
86
- :url => row[0],
87
- :result => row[1],
88
- :message => row[2]
89
- }
90
- end
89
+ set_status_cache_data(status, row)
91
90
  end
92
91
  log.info("Loading previous status from #{status_filename} finished, #{status.keys.length} loaded.")
93
92
  end
@@ -101,16 +100,7 @@ module SocialCrawler
101
100
  return data
102
101
  end
103
102
  CSV.foreach(output_list_filename) do |row|
104
- log.info("Loading #{row} #{row.count}")
105
- if row.count >= 5
106
- data[row[0]] = {
107
- :url => row[0],
108
- :title => row[1],
109
- :twitter => row[2],
110
- :facebook => row[3],
111
- :google_plus => row[4]
112
- }
113
- end
103
+ set_output_cache_data(data, row)
114
104
  log.info("Loading previous status from #{output_list_filename} finished, #{data.keys.length} loaded.")
115
105
  end
116
106
  return data
@@ -125,13 +115,9 @@ module SocialCrawler
125
115
  data = load_output_cache(output_list_filename, log)
126
116
 
127
117
  CSV.open(output_list_filename, "wb") do |output|
128
- data.each do |k, v|
129
- output << [k, v[:title], v[:twitter], v[:facebook], v[:google_plus]]
130
- end
118
+ write_data(data, output)
131
119
  CSV.open(status_filename, "wb") do |status_line|
132
- status.each do |k, v|
133
- status_line << [k, v[:success], v[:message]]
134
- end
120
+ write_status(status, status_line)
135
121
  crawl_loop(data, domain_list_filename, log, output, status, status_line)
136
122
  end
137
123
  end
@@ -151,6 +137,18 @@ module SocialCrawler
151
137
 
152
138
  private
153
139
 
140
+ def write_data(data, output)
141
+ data.each do |k, v|
142
+ output << [k, v[:title], v[:twitter], v[:facebook], v[:google_plus]]
143
+ end
144
+ end
145
+
146
+ def write_status(status, status_line)
147
+ status.each do |k, v|
148
+ status_line << [k, v[:success], v[:message]]
149
+ end
150
+ end
151
+
154
152
  def set_data(result, url, data, output)
155
153
  if result[:success] == true
156
154
  data[url] = result
@@ -166,11 +164,33 @@ module SocialCrawler
166
164
  }
167
165
  status_line << [url, result[:success], result[:message]]
168
166
  end
167
+
168
+ def set_output_cache_data(data, row)
169
+ if row.count >= 5
170
+ data[row[0]] = {
171
+ :url => row[0],
172
+ :title => row[1],
173
+ :twitter => row[2],
174
+ :facebook => row[3],
175
+ :google_plus => row[4]
176
+ }
177
+ end
178
+ end
179
+
180
+ def set_status_cache_data(status, row)
181
+ if row.count >= 3
182
+ status[row[0]] = {
183
+ :url => row[0],
184
+ :result => row[1],
185
+ :message => row[2]
186
+ }
187
+ end
188
+ end
169
189
  end
170
190
  end
171
191
 
172
192
  if __FILE__ == $0
173
- # :nocov:
193
+ #:nocov:
174
194
  SocialCrawler::SocialCrawler.new.crawl(ARGV[0], ARGV[1], ARGV[2])
175
- # :nocov:
195
+ #:nocov:
176
196
  end
@@ -1,3 +1,3 @@
1
1
  module SocialCrawler
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
@@ -1,11 +1,9 @@
1
- require "codeclimate-test-reporter"
2
1
  require 'simplecov'
3
2
  require 'coveralls'
4
3
  SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
5
4
  SimpleCov::Formatter::HTMLFormatter,
6
5
  Coveralls::SimpleCov::Formatter
7
6
  ]
8
- CodeClimate::TestReporter.start
9
7
  SimpleCov.start
10
8
  puts "Simple Coverage Started"
11
9
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: socialcrawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ivica Ceraj
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-01-26 00:00:00.000000000 Z
11
+ date: 2017-02-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -101,7 +101,6 @@ files:
101
101
  - test/test_crawler.rb
102
102
  - test/test_helper.rb
103
103
  - test/test_url.txt
104
- - test_status.txt
105
104
  homepage: http://github.com/iceraj/socialcrawler
106
105
  licenses:
107
106
  - LGPL 2.1
@@ -122,7 +121,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
122
121
  version: '0'
123
122
  requirements: []
124
123
  rubyforge_project:
125
- rubygems_version: 2.4.5
124
+ rubygems_version: 2.5.1
126
125
  signing_key:
127
126
  specification_version: 4
128
127
  summary: SocialCrawler looks for social media links for different sites
@@ -1,4 +0,0 @@
1
- https://twitter.com/bugaco,,""
2
- https://plus.google.com/101033631762132540828/posts,,""
3
- https://www.facebook.com/,,""
4
- https://localhost/,,"Connection refused - connect(2) for ""localhost"" port 443"