socialcrawler 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0f04315836dd27d5ceaf9cc4d34490eb92c40bfc
4
- data.tar.gz: e1277912203a7ae9b52a4078e73ba48346cd5e0f
3
+ metadata.gz: 479202e48815c59eb775fc309eb5f771b27c7c16
4
+ data.tar.gz: b085817b3960a9397e0379561ecb27e81ea1fcd6
5
5
  SHA512:
6
- metadata.gz: 5c9b1577dc309c82657e6a083fc5fb529b14cea34719c5e7c70ad1e86a9dc5a83ae76ec72ca8c236eb0aec5e97ea2c06925fc7a31e5eb74ebaea9f52a9499a4f
7
- data.tar.gz: c5904e1b4a6fdc076f277cb504eab15e3d64f65f67ebb4caac730aff4f233ddfcce8daafbb0d8e3b8450bd1fbe5534543c757545f9c1adc841292715c9a4836a
6
+ metadata.gz: 38f539ee81079a7000ca42b6074caf4269db86a41992952d40b243d3f64970fd5319acad8aea8f488b113ff199438a986d8c51a407bc293aa92c8d184f2718a5
7
+ data.tar.gz: 93806c94207115dbe3ea49f0f6192eb8e9ad36fa853a13c2ff7fbfd247b5e9247fed6a975f1eb53ef3130c637686c0dd4eec7518e50c51f99cbfc595f267af99
@@ -1,9 +1,8 @@
1
1
  language: ruby
2
2
 
3
3
  rvm:
4
- - "1.9.3"
5
- - jruby-19mode
6
- - rbx
4
+ - "2.3.1"
5
+ - "2.4.0"
7
6
 
8
7
  addons:
9
8
  climate_control:
data/Gemfile CHANGED
@@ -7,3 +7,4 @@ gem 'nokogiri', '1.6.4.1'
7
7
  # test coverage
8
8
  gem 'coveralls', require: false, group: :test
9
9
  gem "codeclimate-test-reporter", group: :test, require: nil
10
+ gem 'test-unit'
@@ -28,7 +28,12 @@ module SocialCrawler
28
28
  @map = {
29
29
  twitter: 'twitter.com/',
30
30
  facebook: 'facebook.com/',
31
- google_plus: 'plus.google.com/'
31
+ google_plus: 'plus.google.com/',
32
+ instagram: 'www.instagram.com',
33
+ you_tube: 'youtube.com/user',
34
+ pinterest: 'pinterest.com/',
35
+ linked_in: 'linkedin.com/',
36
+ flickr: 'flickr.com/'
32
37
  }
33
38
  end
34
39
 
@@ -57,7 +62,7 @@ module SocialCrawler
57
62
  def crawl_url(url, log=nil)
58
63
  log = Logger.new(STDOUT) if log.nil?
59
64
  log.info("Crawling #{url}")
60
- result = Hash.new('NOT FOUND')
65
+ result = Hash.new(:NOT_FOUND)
61
66
  begin
62
67
  page = Nokogiri::HTML(open(url))
63
68
  title = page.css('title')
@@ -81,13 +86,7 @@ module SocialCrawler
81
86
  if not status_filename.nil? and File.exists?(status_filename)
82
87
  log.info("Loading previous status from #{status_filename}")
83
88
  CSV.foreach(status_filename) do |row|
84
- if row.count >= 3
85
- status[row[0]] = {
86
- :url => row[0],
87
- :result => row[1],
88
- :message => row[2]
89
- }
90
- end
89
+ set_status_cache_data(status, row)
91
90
  end
92
91
  log.info("Loading previous status from #{status_filename} finished, #{status.keys.length} loaded.")
93
92
  end
@@ -101,16 +100,7 @@ module SocialCrawler
101
100
  return data
102
101
  end
103
102
  CSV.foreach(output_list_filename) do |row|
104
- log.info("Loading #{row} #{row.count}")
105
- if row.count >= 5
106
- data[row[0]] = {
107
- :url => row[0],
108
- :title => row[1],
109
- :twitter => row[2],
110
- :facebook => row[3],
111
- :google_plus => row[4]
112
- }
113
- end
103
+ set_output_cache_data(data, row)
114
104
  log.info("Loading previous status from #{output_list_filename} finished, #{data.keys.length} loaded.")
115
105
  end
116
106
  return data
@@ -125,13 +115,9 @@ module SocialCrawler
125
115
  data = load_output_cache(output_list_filename, log)
126
116
 
127
117
  CSV.open(output_list_filename, "wb") do |output|
128
- data.each do |k, v|
129
- output << [k, v[:title], v[:twitter], v[:facebook], v[:google_plus]]
130
- end
118
+ write_data(data, output)
131
119
  CSV.open(status_filename, "wb") do |status_line|
132
- status.each do |k, v|
133
- status_line << [k, v[:success], v[:message]]
134
- end
120
+ write_status(status, status_line)
135
121
  crawl_loop(data, domain_list_filename, log, output, status, status_line)
136
122
  end
137
123
  end
@@ -151,6 +137,18 @@ module SocialCrawler
151
137
 
152
138
  private
153
139
 
140
+ def write_data(data, output)
141
+ data.each do |k, v|
142
+ output << [k, v[:title], v[:twitter], v[:facebook], v[:google_plus]]
143
+ end
144
+ end
145
+
146
+ def write_status(status, status_line)
147
+ status.each do |k, v|
148
+ status_line << [k, v[:success], v[:message]]
149
+ end
150
+ end
151
+
154
152
  def set_data(result, url, data, output)
155
153
  if result[:success] == true
156
154
  data[url] = result
@@ -166,11 +164,33 @@ module SocialCrawler
166
164
  }
167
165
  status_line << [url, result[:success], result[:message]]
168
166
  end
167
+
168
+ def set_output_cache_data(data, row)
169
+ if row.count >= 5
170
+ data[row[0]] = {
171
+ :url => row[0],
172
+ :title => row[1],
173
+ :twitter => row[2],
174
+ :facebook => row[3],
175
+ :google_plus => row[4]
176
+ }
177
+ end
178
+ end
179
+
180
+ def set_status_cache_data(status, row)
181
+ if row.count >= 3
182
+ status[row[0]] = {
183
+ :url => row[0],
184
+ :result => row[1],
185
+ :message => row[2]
186
+ }
187
+ end
188
+ end
169
189
  end
170
190
  end
171
191
 
172
192
  if __FILE__ == $0
173
- # :nocov:
193
+ #:nocov:
174
194
  SocialCrawler::SocialCrawler.new.crawl(ARGV[0], ARGV[1], ARGV[2])
175
- # :nocov:
195
+ #:nocov:
176
196
  end
@@ -1,3 +1,3 @@
1
1
  module SocialCrawler
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
@@ -1,11 +1,9 @@
1
- require "codeclimate-test-reporter"
2
1
  require 'simplecov'
3
2
  require 'coveralls'
4
3
  SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
5
4
  SimpleCov::Formatter::HTMLFormatter,
6
5
  Coveralls::SimpleCov::Formatter
7
6
  ]
8
- CodeClimate::TestReporter.start
9
7
  SimpleCov.start
10
8
  puts "Simple Coverage Started"
11
9
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: socialcrawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ivica Ceraj
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-01-26 00:00:00.000000000 Z
11
+ date: 2017-02-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -101,7 +101,6 @@ files:
101
101
  - test/test_crawler.rb
102
102
  - test/test_helper.rb
103
103
  - test/test_url.txt
104
- - test_status.txt
105
104
  homepage: http://github.com/iceraj/socialcrawler
106
105
  licenses:
107
106
  - LGPL 2.1
@@ -122,7 +121,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
122
121
  version: '0'
123
122
  requirements: []
124
123
  rubyforge_project:
125
- rubygems_version: 2.4.5
124
+ rubygems_version: 2.5.1
126
125
  signing_key:
127
126
  specification_version: 4
128
127
  summary: SocialCrawler looks for social media links for different sites
@@ -1,4 +0,0 @@
1
- https://twitter.com/bugaco,,""
2
- https://plus.google.com/101033631762132540828/posts,,""
3
- https://www.facebook.com/,,""
4
- https://localhost/,,"Connection refused - connect(2) for ""localhost"" port 443"