socialcrawler 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 0f04315836dd27d5ceaf9cc4d34490eb92c40bfc
4
+ data.tar.gz: e1277912203a7ae9b52a4078e73ba48346cd5e0f
5
+ SHA512:
6
+ metadata.gz: 5c9b1577dc309c82657e6a083fc5fb529b14cea34719c5e7c70ad1e86a9dc5a83ae76ec72ca8c236eb0aec5e97ea2c06925fc7a31e5eb74ebaea9f52a9499a4f
7
+ data.tar.gz: c5904e1b4a6fdc076f277cb504eab15e3d64f65f67ebb4caac730aff4f233ddfcce8daafbb0d8e3b8450bd1fbe5534543c757545f9c1adc841292715c9a4836a
@@ -1,3 +1,3 @@
1
1
  module SocialCrawler
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
data/lib/socialcrawler.rb CHANGED
@@ -81,17 +81,13 @@ module SocialCrawler
81
81
  if not status_filename.nil? and File.exists?(status_filename)
82
82
  log.info("Loading previous status from #{status_filename}")
83
83
  CSV.foreach(status_filename) do |row|
84
- if row.count < 3
85
- next
84
+ if row.count >= 3
85
+ status[row[0]] = {
86
+ :url => row[0],
87
+ :result => row[1],
88
+ :message => row[2]
89
+ }
86
90
  end
87
- url = row[0]
88
- result = row[1]
89
- message = row[2]
90
- status[url] = {
91
- :url => url,
92
- :result => result,
93
- :message => message
94
- }
95
91
  end
96
92
  log.info("Loading previous status from #{status_filename} finished, #{status.keys.length} loaded.")
97
93
  end
@@ -106,21 +102,15 @@ module SocialCrawler
106
102
  end
107
103
  CSV.foreach(output_list_filename) do |row|
108
104
  log.info("Loading #{row} #{row.count}")
109
- if row.count < 5
110
- next
105
+ if row.count >= 5
106
+ data[row[0]] = {
107
+ :url => row[0],
108
+ :title => row[1],
109
+ :twitter => row[2],
110
+ :facebook => row[3],
111
+ :google_plus => row[4]
112
+ }
111
113
  end
112
- url = row[0]
113
- title= row[1]
114
- twitter = row[2]
115
- facebook = row[3]
116
- google_plus = row[4]
117
- data[url] = {
118
- :url => url,
119
- :title => title,
120
- :twitter => twitter,
121
- :facebook => facebook,
122
- :google_plus => google_plus
123
- }
124
114
  log.info("Loading previous status from #{output_list_filename} finished, #{data.keys.length} loaded.")
125
115
  end
126
116
  return data
@@ -154,23 +144,33 @@ module SocialCrawler
154
144
  next
155
145
  end
156
146
  result = crawl_url(url, log)
157
- if result[:success] == true
158
- data[url] = result
159
- output << [url, result[:title], result[:twitter], result[:facebook], result[:google_plus]]
160
- end
161
- status[url] = {
162
- :url => url,
163
- :result => result[:success],
164
- :message => result[:message]
165
- }
166
- status_line << [url, result[:success], result[:message]]
147
+ set_data(result, url, data, output)
148
+ set_status(result, url, status, status_line)
149
+ end
150
+ end
151
+
152
+ private
153
+
154
+ def set_data(result, url, data, output)
155
+ if result[:success] == true
156
+ data[url] = result
157
+ output << [url, result[:title], result[:twitter], result[:facebook], result[:google_plus]]
167
158
  end
168
159
  end
160
+
161
+ def set_status(result, url, status, status_line)
162
+ status[url] = {
163
+ :url => url,
164
+ :result => result[:success],
165
+ :message => result[:message]
166
+ }
167
+ status_line << [url, result[:success], result[:message]]
168
+ end
169
169
  end
170
170
  end
171
171
 
172
172
  if __FILE__ == $0
173
- #:nocov:
173
+ # :nocov:
174
174
  SocialCrawler::SocialCrawler.new.crawl(ARGV[0], ARGV[1], ARGV[2])
175
- #:nocov:
175
+ # :nocov:
176
176
  end
data/test/test_crawler.rb CHANGED
@@ -20,6 +20,9 @@ class CrawlewrTest < Test::Unit::TestCase
20
20
 
21
21
  sc = SocialCrawler::SocialCrawler.new
22
22
  sc.crawl('test/test_url.txt', '/tmp/test_out.txt', '/tmp/test_status.txt')
23
+
24
+ sc = SocialCrawler::SocialCrawler.new
25
+ sc.crawl('test/test_url.txt', '/tmp/test_out.txt', 'test_status.txt')
23
26
  end
24
27
 
25
28
  end
data/test_status.txt ADDED
@@ -0,0 +1,4 @@
1
+ https://twitter.com/bugaco,,""
2
+ https://plus.google.com/101033631762132540828/posts,,""
3
+ https://www.facebook.com/,,""
4
+ https://localhost/,,"Connection refused - connect(2) for ""localhost"" port 443"
metadata CHANGED
@@ -1,94 +1,83 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: socialcrawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
5
- prerelease:
4
+ version: 0.0.3
6
5
  platform: ruby
7
6
  authors:
8
7
  - Ivica Ceraj
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2015-01-16 00:00:00.000000000 Z
11
+ date: 2015-01-26 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: bundler
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ~>
17
+ - - "~>"
20
18
  - !ruby/object:Gem::Version
21
19
  version: '1.7'
22
20
  type: :development
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ~>
24
+ - - "~>"
28
25
  - !ruby/object:Gem::Version
29
26
  version: '1.7'
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: rake
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
- - - ~>
31
+ - - "~>"
36
32
  - !ruby/object:Gem::Version
37
33
  version: '10.0'
38
34
  type: :development
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
- - - ~>
38
+ - - "~>"
44
39
  - !ruby/object:Gem::Version
45
40
  version: '10.0'
46
41
  - !ruby/object:Gem::Dependency
47
42
  name: semantic
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
- - - ~>
45
+ - - "~>"
52
46
  - !ruby/object:Gem::Version
53
47
  version: '1.0'
54
48
  type: :development
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
51
  requirements:
59
- - - ~>
52
+ - - "~>"
60
53
  - !ruby/object:Gem::Version
61
54
  version: '1.0'
62
55
  - !ruby/object:Gem::Dependency
63
56
  name: simplecov
64
57
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
58
  requirements:
67
- - - ~>
59
+ - - "~>"
68
60
  - !ruby/object:Gem::Version
69
61
  version: '0.9'
70
62
  type: :development
71
63
  prerelease: false
72
64
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
65
  requirements:
75
- - - ~>
66
+ - - "~>"
76
67
  - !ruby/object:Gem::Version
77
68
  version: '0.9'
78
69
  - !ruby/object:Gem::Dependency
79
70
  name: simplecov-html
80
71
  requirement: !ruby/object:Gem::Requirement
81
- none: false
82
72
  requirements:
83
- - - ~>
73
+ - - "~>"
84
74
  - !ruby/object:Gem::Version
85
75
  version: '0.8'
86
76
  type: :development
87
77
  prerelease: false
88
78
  version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
79
  requirements:
91
- - - ~>
80
+ - - "~>"
92
81
  - !ruby/object:Gem::Version
93
82
  version: '0.8'
94
83
  description: It read file containing list of urls and produces output file with domain,
@@ -99,9 +88,9 @@ executables: []
99
88
  extensions: []
100
89
  extra_rdoc_files: []
101
90
  files:
102
- - .coveralls.yml
103
- - .gitignore
104
- - .travis.yml
91
+ - ".coveralls.yml"
92
+ - ".gitignore"
93
+ - ".travis.yml"
105
94
  - Gemfile
106
95
  - LICENSE.txt
107
96
  - README.md
@@ -112,30 +101,30 @@ files:
112
101
  - test/test_crawler.rb
113
102
  - test/test_helper.rb
114
103
  - test/test_url.txt
104
+ - test_status.txt
115
105
  homepage: http://github.com/iceraj/socialcrawler
116
106
  licenses:
117
107
  - LGPL 2.1
108
+ metadata: {}
118
109
  post_install_message:
119
110
  rdoc_options: []
120
111
  require_paths:
121
112
  - lib
122
113
  required_ruby_version: !ruby/object:Gem::Requirement
123
- none: false
124
114
  requirements:
125
- - - ! '>='
115
+ - - ">="
126
116
  - !ruby/object:Gem::Version
127
117
  version: '0'
128
118
  required_rubygems_version: !ruby/object:Gem::Requirement
129
- none: false
130
119
  requirements:
131
- - - ! '>='
120
+ - - ">="
132
121
  - !ruby/object:Gem::Version
133
122
  version: '0'
134
123
  requirements: []
135
124
  rubyforge_project:
136
- rubygems_version: 1.8.24
125
+ rubygems_version: 2.4.5
137
126
  signing_key:
138
- specification_version: 3
127
+ specification_version: 4
139
128
  summary: SocialCrawler looks for social media links for different sites
140
129
  test_files:
141
130
  - test/test_crawler.rb