socialcrawler 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/socialcrawler/version.rb +1 -1
- data/lib/socialcrawler.rb +36 -36
- data/test/test_crawler.rb +3 -0
- data/test_status.txt +4 -0
- metadata +21 -32
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 0f04315836dd27d5ceaf9cc4d34490eb92c40bfc
|
4
|
+
data.tar.gz: e1277912203a7ae9b52a4078e73ba48346cd5e0f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5c9b1577dc309c82657e6a083fc5fb529b14cea34719c5e7c70ad1e86a9dc5a83ae76ec72ca8c236eb0aec5e97ea2c06925fc7a31e5eb74ebaea9f52a9499a4f
|
7
|
+
data.tar.gz: c5904e1b4a6fdc076f277cb504eab15e3d64f65f67ebb4caac730aff4f233ddfcce8daafbb0d8e3b8450bd1fbe5534543c757545f9c1adc841292715c9a4836a
|
data/lib/socialcrawler.rb
CHANGED
@@ -81,17 +81,13 @@ module SocialCrawler
|
|
81
81
|
if not status_filename.nil? and File.exists?(status_filename)
|
82
82
|
log.info("Loading previous status from #{status_filename}")
|
83
83
|
CSV.foreach(status_filename) do |row|
|
84
|
-
if row.count
|
85
|
-
|
84
|
+
if row.count >= 3
|
85
|
+
status[row[0]] = {
|
86
|
+
:url => row[0],
|
87
|
+
:result => row[1],
|
88
|
+
:message => row[2]
|
89
|
+
}
|
86
90
|
end
|
87
|
-
url = row[0]
|
88
|
-
result = row[1]
|
89
|
-
message = row[2]
|
90
|
-
status[url] = {
|
91
|
-
:url => url,
|
92
|
-
:result => result,
|
93
|
-
:message => message
|
94
|
-
}
|
95
91
|
end
|
96
92
|
log.info("Loading previous status from #{status_filename} finished, #{status.keys.length} loaded.")
|
97
93
|
end
|
@@ -106,21 +102,15 @@ module SocialCrawler
|
|
106
102
|
end
|
107
103
|
CSV.foreach(output_list_filename) do |row|
|
108
104
|
log.info("Loading #{row} #{row.count}")
|
109
|
-
if row.count
|
110
|
-
|
105
|
+
if row.count >= 5
|
106
|
+
data[row[0]] = {
|
107
|
+
:url => row[0],
|
108
|
+
:title => row[1],
|
109
|
+
:twitter => row[2],
|
110
|
+
:facebook => row[3],
|
111
|
+
:google_plus => row[4]
|
112
|
+
}
|
111
113
|
end
|
112
|
-
url = row[0]
|
113
|
-
title= row[1]
|
114
|
-
twitter = row[2]
|
115
|
-
facebook = row[3]
|
116
|
-
google_plus = row[4]
|
117
|
-
data[url] = {
|
118
|
-
:url => url,
|
119
|
-
:title => title,
|
120
|
-
:twitter => twitter,
|
121
|
-
:facebook => facebook,
|
122
|
-
:google_plus => google_plus
|
123
|
-
}
|
124
114
|
log.info("Loading previous status from #{output_list_filename} finished, #{data.keys.length} loaded.")
|
125
115
|
end
|
126
116
|
return data
|
@@ -154,23 +144,33 @@ module SocialCrawler
|
|
154
144
|
next
|
155
145
|
end
|
156
146
|
result = crawl_url(url, log)
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
147
|
+
set_data(result, url, data, output)
|
148
|
+
set_status(result, url, status, status_line)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
private
|
153
|
+
|
154
|
+
def set_data(result, url, data, output)
|
155
|
+
if result[:success] == true
|
156
|
+
data[url] = result
|
157
|
+
output << [url, result[:title], result[:twitter], result[:facebook], result[:google_plus]]
|
167
158
|
end
|
168
159
|
end
|
160
|
+
|
161
|
+
def set_status(result, url, status, status_line)
|
162
|
+
status[url] = {
|
163
|
+
:url => url,
|
164
|
+
:result => result[:success],
|
165
|
+
:message => result[:message]
|
166
|
+
}
|
167
|
+
status_line << [url, result[:success], result[:message]]
|
168
|
+
end
|
169
169
|
end
|
170
170
|
end
|
171
171
|
|
172
172
|
if __FILE__ == $0
|
173
|
-
|
173
|
+
# :nocov:
|
174
174
|
SocialCrawler::SocialCrawler.new.crawl(ARGV[0], ARGV[1], ARGV[2])
|
175
|
-
|
175
|
+
# :nocov:
|
176
176
|
end
|
data/test/test_crawler.rb
CHANGED
@@ -20,6 +20,9 @@ class CrawlewrTest < Test::Unit::TestCase
|
|
20
20
|
|
21
21
|
sc = SocialCrawler::SocialCrawler.new
|
22
22
|
sc.crawl('test/test_url.txt', '/tmp/test_out.txt', '/tmp/test_status.txt')
|
23
|
+
|
24
|
+
sc = SocialCrawler::SocialCrawler.new
|
25
|
+
sc.crawl('test/test_url.txt', '/tmp/test_out.txt', 'test_status.txt')
|
23
26
|
end
|
24
27
|
|
25
28
|
end
|
data/test_status.txt
ADDED
metadata
CHANGED
@@ -1,94 +1,83 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: socialcrawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.0.3
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Ivica Ceraj
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2015-01-
|
11
|
+
date: 2015-01-26 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: bundler
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- - ~>
|
17
|
+
- - "~>"
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '1.7'
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- - ~>
|
24
|
+
- - "~>"
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: '1.7'
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: rake
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- - ~>
|
31
|
+
- - "~>"
|
36
32
|
- !ruby/object:Gem::Version
|
37
33
|
version: '10.0'
|
38
34
|
type: :development
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- - ~>
|
38
|
+
- - "~>"
|
44
39
|
- !ruby/object:Gem::Version
|
45
40
|
version: '10.0'
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: semantic
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
|
-
- - ~>
|
45
|
+
- - "~>"
|
52
46
|
- !ruby/object:Gem::Version
|
53
47
|
version: '1.0'
|
54
48
|
type: :development
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
|
-
- - ~>
|
52
|
+
- - "~>"
|
60
53
|
- !ruby/object:Gem::Version
|
61
54
|
version: '1.0'
|
62
55
|
- !ruby/object:Gem::Dependency
|
63
56
|
name: simplecov
|
64
57
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
58
|
requirements:
|
67
|
-
- - ~>
|
59
|
+
- - "~>"
|
68
60
|
- !ruby/object:Gem::Version
|
69
61
|
version: '0.9'
|
70
62
|
type: :development
|
71
63
|
prerelease: false
|
72
64
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
65
|
requirements:
|
75
|
-
- - ~>
|
66
|
+
- - "~>"
|
76
67
|
- !ruby/object:Gem::Version
|
77
68
|
version: '0.9'
|
78
69
|
- !ruby/object:Gem::Dependency
|
79
70
|
name: simplecov-html
|
80
71
|
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
72
|
requirements:
|
83
|
-
- - ~>
|
73
|
+
- - "~>"
|
84
74
|
- !ruby/object:Gem::Version
|
85
75
|
version: '0.8'
|
86
76
|
type: :development
|
87
77
|
prerelease: false
|
88
78
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
79
|
requirements:
|
91
|
-
- - ~>
|
80
|
+
- - "~>"
|
92
81
|
- !ruby/object:Gem::Version
|
93
82
|
version: '0.8'
|
94
83
|
description: It read file containing list of urls and produces output file with domain,
|
@@ -99,9 +88,9 @@ executables: []
|
|
99
88
|
extensions: []
|
100
89
|
extra_rdoc_files: []
|
101
90
|
files:
|
102
|
-
- .coveralls.yml
|
103
|
-
- .gitignore
|
104
|
-
- .travis.yml
|
91
|
+
- ".coveralls.yml"
|
92
|
+
- ".gitignore"
|
93
|
+
- ".travis.yml"
|
105
94
|
- Gemfile
|
106
95
|
- LICENSE.txt
|
107
96
|
- README.md
|
@@ -112,30 +101,30 @@ files:
|
|
112
101
|
- test/test_crawler.rb
|
113
102
|
- test/test_helper.rb
|
114
103
|
- test/test_url.txt
|
104
|
+
- test_status.txt
|
115
105
|
homepage: http://github.com/iceraj/socialcrawler
|
116
106
|
licenses:
|
117
107
|
- LGPL 2.1
|
108
|
+
metadata: {}
|
118
109
|
post_install_message:
|
119
110
|
rdoc_options: []
|
120
111
|
require_paths:
|
121
112
|
- lib
|
122
113
|
required_ruby_version: !ruby/object:Gem::Requirement
|
123
|
-
none: false
|
124
114
|
requirements:
|
125
|
-
- -
|
115
|
+
- - ">="
|
126
116
|
- !ruby/object:Gem::Version
|
127
117
|
version: '0'
|
128
118
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
129
|
-
none: false
|
130
119
|
requirements:
|
131
|
-
- -
|
120
|
+
- - ">="
|
132
121
|
- !ruby/object:Gem::Version
|
133
122
|
version: '0'
|
134
123
|
requirements: []
|
135
124
|
rubyforge_project:
|
136
|
-
rubygems_version:
|
125
|
+
rubygems_version: 2.4.5
|
137
126
|
signing_key:
|
138
|
-
specification_version:
|
127
|
+
specification_version: 4
|
139
128
|
summary: SocialCrawler looks for social media links for different sites
|
140
129
|
test_files:
|
141
130
|
- test/test_crawler.rb
|