socialcrawler 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/socialcrawler/version.rb +1 -1
- data/lib/socialcrawler.rb +36 -36
- data/test/test_crawler.rb +3 -0
- data/test_status.txt +4 -0
- metadata +21 -32
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 0f04315836dd27d5ceaf9cc4d34490eb92c40bfc
|
4
|
+
data.tar.gz: e1277912203a7ae9b52a4078e73ba48346cd5e0f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5c9b1577dc309c82657e6a083fc5fb529b14cea34719c5e7c70ad1e86a9dc5a83ae76ec72ca8c236eb0aec5e97ea2c06925fc7a31e5eb74ebaea9f52a9499a4f
|
7
|
+
data.tar.gz: c5904e1b4a6fdc076f277cb504eab15e3d64f65f67ebb4caac730aff4f233ddfcce8daafbb0d8e3b8450bd1fbe5534543c757545f9c1adc841292715c9a4836a
|
data/lib/socialcrawler.rb
CHANGED
@@ -81,17 +81,13 @@ module SocialCrawler
|
|
81
81
|
if not status_filename.nil? and File.exists?(status_filename)
|
82
82
|
log.info("Loading previous status from #{status_filename}")
|
83
83
|
CSV.foreach(status_filename) do |row|
|
84
|
-
if row.count
|
85
|
-
|
84
|
+
if row.count >= 3
|
85
|
+
status[row[0]] = {
|
86
|
+
:url => row[0],
|
87
|
+
:result => row[1],
|
88
|
+
:message => row[2]
|
89
|
+
}
|
86
90
|
end
|
87
|
-
url = row[0]
|
88
|
-
result = row[1]
|
89
|
-
message = row[2]
|
90
|
-
status[url] = {
|
91
|
-
:url => url,
|
92
|
-
:result => result,
|
93
|
-
:message => message
|
94
|
-
}
|
95
91
|
end
|
96
92
|
log.info("Loading previous status from #{status_filename} finished, #{status.keys.length} loaded.")
|
97
93
|
end
|
@@ -106,21 +102,15 @@ module SocialCrawler
|
|
106
102
|
end
|
107
103
|
CSV.foreach(output_list_filename) do |row|
|
108
104
|
log.info("Loading #{row} #{row.count}")
|
109
|
-
if row.count
|
110
|
-
|
105
|
+
if row.count >= 5
|
106
|
+
data[row[0]] = {
|
107
|
+
:url => row[0],
|
108
|
+
:title => row[1],
|
109
|
+
:twitter => row[2],
|
110
|
+
:facebook => row[3],
|
111
|
+
:google_plus => row[4]
|
112
|
+
}
|
111
113
|
end
|
112
|
-
url = row[0]
|
113
|
-
title= row[1]
|
114
|
-
twitter = row[2]
|
115
|
-
facebook = row[3]
|
116
|
-
google_plus = row[4]
|
117
|
-
data[url] = {
|
118
|
-
:url => url,
|
119
|
-
:title => title,
|
120
|
-
:twitter => twitter,
|
121
|
-
:facebook => facebook,
|
122
|
-
:google_plus => google_plus
|
123
|
-
}
|
124
114
|
log.info("Loading previous status from #{output_list_filename} finished, #{data.keys.length} loaded.")
|
125
115
|
end
|
126
116
|
return data
|
@@ -154,23 +144,33 @@ module SocialCrawler
|
|
154
144
|
next
|
155
145
|
end
|
156
146
|
result = crawl_url(url, log)
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
147
|
+
set_data(result, url, data, output)
|
148
|
+
set_status(result, url, status, status_line)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
private
|
153
|
+
|
154
|
+
def set_data(result, url, data, output)
|
155
|
+
if result[:success] == true
|
156
|
+
data[url] = result
|
157
|
+
output << [url, result[:title], result[:twitter], result[:facebook], result[:google_plus]]
|
167
158
|
end
|
168
159
|
end
|
160
|
+
|
161
|
+
def set_status(result, url, status, status_line)
|
162
|
+
status[url] = {
|
163
|
+
:url => url,
|
164
|
+
:result => result[:success],
|
165
|
+
:message => result[:message]
|
166
|
+
}
|
167
|
+
status_line << [url, result[:success], result[:message]]
|
168
|
+
end
|
169
169
|
end
|
170
170
|
end
|
171
171
|
|
172
172
|
if __FILE__ == $0
|
173
|
-
|
173
|
+
# :nocov:
|
174
174
|
SocialCrawler::SocialCrawler.new.crawl(ARGV[0], ARGV[1], ARGV[2])
|
175
|
-
|
175
|
+
# :nocov:
|
176
176
|
end
|
data/test/test_crawler.rb
CHANGED
@@ -20,6 +20,9 @@ class CrawlewrTest < Test::Unit::TestCase
|
|
20
20
|
|
21
21
|
sc = SocialCrawler::SocialCrawler.new
|
22
22
|
sc.crawl('test/test_url.txt', '/tmp/test_out.txt', '/tmp/test_status.txt')
|
23
|
+
|
24
|
+
sc = SocialCrawler::SocialCrawler.new
|
25
|
+
sc.crawl('test/test_url.txt', '/tmp/test_out.txt', 'test_status.txt')
|
23
26
|
end
|
24
27
|
|
25
28
|
end
|
data/test_status.txt
ADDED
metadata
CHANGED
@@ -1,94 +1,83 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: socialcrawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.0.3
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Ivica Ceraj
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2015-01-
|
11
|
+
date: 2015-01-26 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: bundler
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- - ~>
|
17
|
+
- - "~>"
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '1.7'
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- - ~>
|
24
|
+
- - "~>"
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: '1.7'
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: rake
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- - ~>
|
31
|
+
- - "~>"
|
36
32
|
- !ruby/object:Gem::Version
|
37
33
|
version: '10.0'
|
38
34
|
type: :development
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- - ~>
|
38
|
+
- - "~>"
|
44
39
|
- !ruby/object:Gem::Version
|
45
40
|
version: '10.0'
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: semantic
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
|
-
- - ~>
|
45
|
+
- - "~>"
|
52
46
|
- !ruby/object:Gem::Version
|
53
47
|
version: '1.0'
|
54
48
|
type: :development
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
|
-
- - ~>
|
52
|
+
- - "~>"
|
60
53
|
- !ruby/object:Gem::Version
|
61
54
|
version: '1.0'
|
62
55
|
- !ruby/object:Gem::Dependency
|
63
56
|
name: simplecov
|
64
57
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
58
|
requirements:
|
67
|
-
- - ~>
|
59
|
+
- - "~>"
|
68
60
|
- !ruby/object:Gem::Version
|
69
61
|
version: '0.9'
|
70
62
|
type: :development
|
71
63
|
prerelease: false
|
72
64
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
65
|
requirements:
|
75
|
-
- - ~>
|
66
|
+
- - "~>"
|
76
67
|
- !ruby/object:Gem::Version
|
77
68
|
version: '0.9'
|
78
69
|
- !ruby/object:Gem::Dependency
|
79
70
|
name: simplecov-html
|
80
71
|
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
72
|
requirements:
|
83
|
-
- - ~>
|
73
|
+
- - "~>"
|
84
74
|
- !ruby/object:Gem::Version
|
85
75
|
version: '0.8'
|
86
76
|
type: :development
|
87
77
|
prerelease: false
|
88
78
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
79
|
requirements:
|
91
|
-
- - ~>
|
80
|
+
- - "~>"
|
92
81
|
- !ruby/object:Gem::Version
|
93
82
|
version: '0.8'
|
94
83
|
description: It read file containing list of urls and produces output file with domain,
|
@@ -99,9 +88,9 @@ executables: []
|
|
99
88
|
extensions: []
|
100
89
|
extra_rdoc_files: []
|
101
90
|
files:
|
102
|
-
- .coveralls.yml
|
103
|
-
- .gitignore
|
104
|
-
- .travis.yml
|
91
|
+
- ".coveralls.yml"
|
92
|
+
- ".gitignore"
|
93
|
+
- ".travis.yml"
|
105
94
|
- Gemfile
|
106
95
|
- LICENSE.txt
|
107
96
|
- README.md
|
@@ -112,30 +101,30 @@ files:
|
|
112
101
|
- test/test_crawler.rb
|
113
102
|
- test/test_helper.rb
|
114
103
|
- test/test_url.txt
|
104
|
+
- test_status.txt
|
115
105
|
homepage: http://github.com/iceraj/socialcrawler
|
116
106
|
licenses:
|
117
107
|
- LGPL 2.1
|
108
|
+
metadata: {}
|
118
109
|
post_install_message:
|
119
110
|
rdoc_options: []
|
120
111
|
require_paths:
|
121
112
|
- lib
|
122
113
|
required_ruby_version: !ruby/object:Gem::Requirement
|
123
|
-
none: false
|
124
114
|
requirements:
|
125
|
-
- -
|
115
|
+
- - ">="
|
126
116
|
- !ruby/object:Gem::Version
|
127
117
|
version: '0'
|
128
118
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
129
|
-
none: false
|
130
119
|
requirements:
|
131
|
-
- -
|
120
|
+
- - ">="
|
132
121
|
- !ruby/object:Gem::Version
|
133
122
|
version: '0'
|
134
123
|
requirements: []
|
135
124
|
rubyforge_project:
|
136
|
-
rubygems_version:
|
125
|
+
rubygems_version: 2.4.5
|
137
126
|
signing_key:
|
138
|
-
specification_version:
|
127
|
+
specification_version: 4
|
139
128
|
summary: SocialCrawler looks for social media links for different sites
|
140
129
|
test_files:
|
141
130
|
- test/test_crawler.rb
|