socialcrawler 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 0f04315836dd27d5ceaf9cc4d34490eb92c40bfc
4
+ data.tar.gz: e1277912203a7ae9b52a4078e73ba48346cd5e0f
5
+ SHA512:
6
+ metadata.gz: 5c9b1577dc309c82657e6a083fc5fb529b14cea34719c5e7c70ad1e86a9dc5a83ae76ec72ca8c236eb0aec5e97ea2c06925fc7a31e5eb74ebaea9f52a9499a4f
7
+ data.tar.gz: c5904e1b4a6fdc076f277cb504eab15e3d64f65f67ebb4caac730aff4f233ddfcce8daafbb0d8e3b8450bd1fbe5534543c757545f9c1adc841292715c9a4836a
@@ -1,3 +1,3 @@
1
1
  module SocialCrawler
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
data/lib/socialcrawler.rb CHANGED
@@ -81,17 +81,13 @@ module SocialCrawler
81
81
  if not status_filename.nil? and File.exists?(status_filename)
82
82
  log.info("Loading previous status from #{status_filename}")
83
83
  CSV.foreach(status_filename) do |row|
84
- if row.count < 3
85
- next
84
+ if row.count >= 3
85
+ status[row[0]] = {
86
+ :url => row[0],
87
+ :result => row[1],
88
+ :message => row[2]
89
+ }
86
90
  end
87
- url = row[0]
88
- result = row[1]
89
- message = row[2]
90
- status[url] = {
91
- :url => url,
92
- :result => result,
93
- :message => message
94
- }
95
91
  end
96
92
  log.info("Loading previous status from #{status_filename} finished, #{status.keys.length} loaded.")
97
93
  end
@@ -106,21 +102,15 @@ module SocialCrawler
106
102
  end
107
103
  CSV.foreach(output_list_filename) do |row|
108
104
  log.info("Loading #{row} #{row.count}")
109
- if row.count < 5
110
- next
105
+ if row.count >= 5
106
+ data[row[0]] = {
107
+ :url => row[0],
108
+ :title => row[1],
109
+ :twitter => row[2],
110
+ :facebook => row[3],
111
+ :google_plus => row[4]
112
+ }
111
113
  end
112
- url = row[0]
113
- title= row[1]
114
- twitter = row[2]
115
- facebook = row[3]
116
- google_plus = row[4]
117
- data[url] = {
118
- :url => url,
119
- :title => title,
120
- :twitter => twitter,
121
- :facebook => facebook,
122
- :google_plus => google_plus
123
- }
124
114
  log.info("Loading previous status from #{output_list_filename} finished, #{data.keys.length} loaded.")
125
115
  end
126
116
  return data
@@ -154,23 +144,33 @@ module SocialCrawler
154
144
  next
155
145
  end
156
146
  result = crawl_url(url, log)
157
- if result[:success] == true
158
- data[url] = result
159
- output << [url, result[:title], result[:twitter], result[:facebook], result[:google_plus]]
160
- end
161
- status[url] = {
162
- :url => url,
163
- :result => result[:success],
164
- :message => result[:message]
165
- }
166
- status_line << [url, result[:success], result[:message]]
147
+ set_data(result, url, data, output)
148
+ set_status(result, url, status, status_line)
149
+ end
150
+ end
151
+
152
+ private
153
+
154
+ def set_data(result, url, data, output)
155
+ if result[:success] == true
156
+ data[url] = result
157
+ output << [url, result[:title], result[:twitter], result[:facebook], result[:google_plus]]
167
158
  end
168
159
  end
160
+
161
+ def set_status(result, url, status, status_line)
162
+ status[url] = {
163
+ :url => url,
164
+ :result => result[:success],
165
+ :message => result[:message]
166
+ }
167
+ status_line << [url, result[:success], result[:message]]
168
+ end
169
169
  end
170
170
  end
171
171
 
172
172
  if __FILE__ == $0
173
- #:nocov:
173
+ # :nocov:
174
174
  SocialCrawler::SocialCrawler.new.crawl(ARGV[0], ARGV[1], ARGV[2])
175
- #:nocov:
175
+ # :nocov:
176
176
  end
data/test/test_crawler.rb CHANGED
@@ -20,6 +20,9 @@ class CrawlewrTest < Test::Unit::TestCase
20
20
 
21
21
  sc = SocialCrawler::SocialCrawler.new
22
22
  sc.crawl('test/test_url.txt', '/tmp/test_out.txt', '/tmp/test_status.txt')
23
+
24
+ sc = SocialCrawler::SocialCrawler.new
25
+ sc.crawl('test/test_url.txt', '/tmp/test_out.txt', 'test_status.txt')
23
26
  end
24
27
 
25
28
  end
data/test_status.txt ADDED
@@ -0,0 +1,4 @@
1
+ https://twitter.com/bugaco,,""
2
+ https://plus.google.com/101033631762132540828/posts,,""
3
+ https://www.facebook.com/,,""
4
+ https://localhost/,,"Connection refused - connect(2) for ""localhost"" port 443"
metadata CHANGED
@@ -1,94 +1,83 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: socialcrawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
5
- prerelease:
4
+ version: 0.0.3
6
5
  platform: ruby
7
6
  authors:
8
7
  - Ivica Ceraj
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2015-01-16 00:00:00.000000000 Z
11
+ date: 2015-01-26 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: bundler
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ~>
17
+ - - "~>"
20
18
  - !ruby/object:Gem::Version
21
19
  version: '1.7'
22
20
  type: :development
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ~>
24
+ - - "~>"
28
25
  - !ruby/object:Gem::Version
29
26
  version: '1.7'
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: rake
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
- - - ~>
31
+ - - "~>"
36
32
  - !ruby/object:Gem::Version
37
33
  version: '10.0'
38
34
  type: :development
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
- - - ~>
38
+ - - "~>"
44
39
  - !ruby/object:Gem::Version
45
40
  version: '10.0'
46
41
  - !ruby/object:Gem::Dependency
47
42
  name: semantic
48
43
  requirement: !ruby/object:Gem::Requirement
49
- none: false
50
44
  requirements:
51
- - - ~>
45
+ - - "~>"
52
46
  - !ruby/object:Gem::Version
53
47
  version: '1.0'
54
48
  type: :development
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
51
  requirements:
59
- - - ~>
52
+ - - "~>"
60
53
  - !ruby/object:Gem::Version
61
54
  version: '1.0'
62
55
  - !ruby/object:Gem::Dependency
63
56
  name: simplecov
64
57
  requirement: !ruby/object:Gem::Requirement
65
- none: false
66
58
  requirements:
67
- - - ~>
59
+ - - "~>"
68
60
  - !ruby/object:Gem::Version
69
61
  version: '0.9'
70
62
  type: :development
71
63
  prerelease: false
72
64
  version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
65
  requirements:
75
- - - ~>
66
+ - - "~>"
76
67
  - !ruby/object:Gem::Version
77
68
  version: '0.9'
78
69
  - !ruby/object:Gem::Dependency
79
70
  name: simplecov-html
80
71
  requirement: !ruby/object:Gem::Requirement
81
- none: false
82
72
  requirements:
83
- - - ~>
73
+ - - "~>"
84
74
  - !ruby/object:Gem::Version
85
75
  version: '0.8'
86
76
  type: :development
87
77
  prerelease: false
88
78
  version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
79
  requirements:
91
- - - ~>
80
+ - - "~>"
92
81
  - !ruby/object:Gem::Version
93
82
  version: '0.8'
94
83
  description: It read file containing list of urls and produces output file with domain,
@@ -99,9 +88,9 @@ executables: []
99
88
  extensions: []
100
89
  extra_rdoc_files: []
101
90
  files:
102
- - .coveralls.yml
103
- - .gitignore
104
- - .travis.yml
91
+ - ".coveralls.yml"
92
+ - ".gitignore"
93
+ - ".travis.yml"
105
94
  - Gemfile
106
95
  - LICENSE.txt
107
96
  - README.md
@@ -112,30 +101,30 @@ files:
112
101
  - test/test_crawler.rb
113
102
  - test/test_helper.rb
114
103
  - test/test_url.txt
104
+ - test_status.txt
115
105
  homepage: http://github.com/iceraj/socialcrawler
116
106
  licenses:
117
107
  - LGPL 2.1
108
+ metadata: {}
118
109
  post_install_message:
119
110
  rdoc_options: []
120
111
  require_paths:
121
112
  - lib
122
113
  required_ruby_version: !ruby/object:Gem::Requirement
123
- none: false
124
114
  requirements:
125
- - - ! '>='
115
+ - - ">="
126
116
  - !ruby/object:Gem::Version
127
117
  version: '0'
128
118
  required_rubygems_version: !ruby/object:Gem::Requirement
129
- none: false
130
119
  requirements:
131
- - - ! '>='
120
+ - - ">="
132
121
  - !ruby/object:Gem::Version
133
122
  version: '0'
134
123
  requirements: []
135
124
  rubyforge_project:
136
- rubygems_version: 1.8.24
125
+ rubygems_version: 2.4.5
137
126
  signing_key:
138
- specification_version: 3
127
+ specification_version: 4
139
128
  summary: SocialCrawler looks for social media links for different sites
140
129
  test_files:
141
130
  - test/test_crawler.rb