shared_count-cli 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +9 -2
- data/bin/shared_count-cli +1 -0
- data/lib/shared_count/cli/version.rb +1 -1
- data/lib/shared_count/cli.rb +94 -16
- data/spec/shared_count/cli_spec.rb +7 -6
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dbac3484620be6939c0dd0941d049ae62c693901
|
4
|
+
data.tar.gz: af1587d6aef436efdd9aa2d07eb4b3a9073fd27b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 08ea24413ded85fa2ab27f187769e178459dcaf64ee67690c60e6670c090b1fda30de1d7422abab6635ab45ad4b4f0eaa704b02075f99987b1a8f6eb9c3c4fe8
|
7
|
+
data.tar.gz: 35fd4f6093cab45615bde109e1ae42fda27614a798c950375acdd9bba7ee8668f9773ac52ff30169f5ff7c16283f37962f5981415aeb91fbe2009ebb05e8f029
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -13,13 +13,20 @@ CLI to the shared_count_api gem
|
|
13
13
|
* Redirect the CSV file to stdout
|
14
14
|
|
15
15
|
```ruby
|
16
|
-
$
|
16
|
+
$ shared_count-cli ~/Desktop/file?.txt # => ~/Desktop/file1.txt ~/Desktop/file2.txt
|
17
17
|
```
|
18
18
|
|
19
19
|
* Redirect the CSV file to a file
|
20
20
|
|
21
21
|
```ruby
|
22
|
-
$
|
22
|
+
$ shared_count-cli ~/Desktop/file?.txt > ~/Desktop/output.csv
|
23
|
+
```
|
24
|
+
|
25
|
+
* Launch it in debug mode
|
26
|
+
|
27
|
+
```ruby
|
28
|
+
$ DEBUG=true shared_count-cli ~/Desktop/file?.txt > ~/Desktop/output.csv
|
29
|
+
$ tail -f shared_count-cli.log
|
23
30
|
```
|
24
31
|
|
25
32
|
## Contributing
|
data/bin/shared_count-cli
CHANGED
data/lib/shared_count/cli.rb
CHANGED
@@ -1,4 +1,7 @@
|
|
1
1
|
require "csv"
|
2
|
+
require "logger"
|
3
|
+
require "thread"
|
4
|
+
require "uri"
|
2
5
|
require "shared_count_api"
|
3
6
|
|
4
7
|
require_relative "../../config/initializers/dotenv"
|
@@ -8,36 +11,111 @@ require_relative "cli/version"
|
|
8
11
|
|
9
12
|
module SharedCount
|
10
13
|
module Cli
|
14
|
+
JOIN_TIMEOUT = 5 # seconds
|
15
|
+
SLEEP_TIME = 2.5 # seconds
|
16
|
+
MAX_RETRIES = 3
|
17
|
+
LINES_PER_ITERATION = 1000
|
18
|
+
MAX_CONCURRENCY = 50
|
19
|
+
|
11
20
|
class << self
|
12
21
|
def run(lines)
|
13
22
|
configure_shared_count_client
|
14
23
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
24
|
+
iterations, mod = lines.length.divmod(LINES_PER_ITERATION)
|
25
|
+
iterations += 1 if mod > 0
|
26
|
+
results = Queue.new
|
27
|
+
|
28
|
+
iterations.times do |iteration|
|
29
|
+
logger.error "Iteration ##{iteration + 1}"
|
30
|
+
queue = Queue.new
|
31
|
+
from = LINES_PER_ITERATION * iteration
|
32
|
+
lines[from, LINES_PER_ITERATION].each { |url| queue.push(url) }
|
33
|
+
thread_count = [MAX_CONCURRENCY, lines.length].min
|
34
|
+
|
35
|
+
threads = (0...thread_count).map do |thread|
|
36
|
+
Thread.new(thread) do |thread|
|
37
|
+
error = 0
|
38
|
+
|
39
|
+
url = begin
|
40
|
+
queue.pop(true)
|
41
|
+
rescue ThreadError; end
|
42
|
+
|
43
|
+
while url do
|
44
|
+
url.chomp!
|
45
|
+
uri = URI(url)
|
46
|
+
host = uri.host || url[/\Ahttps?:\/\/([^\/]+)/, 1]
|
47
|
+
url = "#{uri.scheme}://#{host}"
|
48
|
+
|
49
|
+
response = nil
|
50
|
+
begin
|
51
|
+
response = SharedCountApi::Client.new(url).response
|
52
|
+
rescue SharedCountApi::Error
|
53
|
+
logger.error "[Thread ##{thread}] - error while processing '#{url}'"
|
54
|
+
rescue => err
|
55
|
+
logger.error "[Thread ##{thread}] - error while processing '#{url}', retry: ##{error} - #{err.inspect}"
|
56
|
+
error += 1
|
57
|
+
sleep(SLEEP_TIME)
|
58
|
+
if error <= MAX_RETRIES
|
59
|
+
retry
|
60
|
+
else
|
61
|
+
queue.push(url)
|
62
|
+
break
|
63
|
+
end
|
64
|
+
else
|
65
|
+
error = 0
|
66
|
+
end
|
67
|
+
|
68
|
+
if response
|
69
|
+
logger.debug "[Thread ##{thread}] - #{url}"
|
70
|
+
|
71
|
+
facebook_metrics = response.delete("Facebook")
|
72
|
+
facebook_metrics = {} unless facebook_metrics.is_a?(Hash)
|
73
|
+
values = response.values.unshift(url)
|
74
|
+
results.push(values.concat(facebook_metrics.values))
|
75
|
+
else
|
76
|
+
logger.warn "[Thread ##{thread}] - no response for '#{url}'"
|
77
|
+
end
|
78
|
+
|
79
|
+
url = begin
|
80
|
+
queue.pop(true)
|
81
|
+
rescue ThreadError; end
|
82
|
+
end
|
23
83
|
end
|
84
|
+
end
|
24
85
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
86
|
+
threads.each do |thread|
|
87
|
+
begin
|
88
|
+
thread.join(JOIN_TIMEOUT)
|
89
|
+
rescue => err
|
90
|
+
logger.error "[Thread ##{thread}] - error while joining main thread: #{err.inspect}"
|
91
|
+
logger.error "[Thread ##{thread}] - #{err.backtrace.join("\n")}"
|
31
92
|
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
32
96
|
|
33
|
-
|
34
|
-
|
97
|
+
CSV.generate do |csv|
|
98
|
+
csv << %w(URL StumbleUpon Reddit Delicious GooglePlusOne Buzz Twitter Diggs Pinterest LinkedIn commentsbox_count click_count total_count comment_count like_count share_count)
|
99
|
+
csv << []
|
100
|
+
loop do
|
101
|
+
begin
|
102
|
+
arr = results.pop(true)
|
103
|
+
csv << arr
|
104
|
+
rescue ThreadError
|
105
|
+
break
|
106
|
+
end
|
35
107
|
end
|
36
108
|
end
|
37
109
|
end
|
38
110
|
|
39
111
|
private
|
40
112
|
|
113
|
+
def logger
|
114
|
+
@logger ||= Logger.new("shared_count-cli.log").tap do |logger|
|
115
|
+
logger.level = ENV["DEBUG"] ? Logger::DEBUG : Logger::ERROR
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
41
119
|
def configure_shared_count_client
|
42
120
|
SharedCountApi.configure do |config|
|
43
121
|
config.apikey = ENV["SHARED_COUNT_APIKEY"]
|
@@ -4,18 +4,19 @@ require_relative "../../lib/shared_count/cli"
|
|
4
4
|
|
5
5
|
describe SharedCount::Cli do
|
6
6
|
describe ".run" do
|
7
|
-
let(:
|
7
|
+
let(:lines) do
|
8
8
|
["http://slashdot.org\n", "http://bbc.co.uk\n", "http://www.lanacion.com.ar\n", "http://www.theguardian.com\n"]
|
9
9
|
end
|
10
10
|
|
11
11
|
it "queries the SharedCount API for each URL passed in" do
|
12
|
-
csv = SharedCount::Cli.run(
|
12
|
+
csv = SharedCount::Cli.run(lines)
|
13
13
|
|
14
14
|
arr = CSV.parse(csv, headers: :first_row)
|
15
|
-
arr[1]["URL"]
|
16
|
-
|
17
|
-
|
18
|
-
|
15
|
+
urls = [arr[1]["URL"], arr[2]["URL"], arr[3]["URL"], arr[4]["URL"]]
|
16
|
+
lines.each do |line|
|
17
|
+
line.chomp!
|
18
|
+
urls.any? { |url| url == line }.must_be_same_as true
|
19
|
+
end
|
19
20
|
end
|
20
21
|
end
|
21
22
|
end
|