shared_count-cli 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +9 -2
- data/bin/shared_count-cli +1 -0
- data/lib/shared_count/cli/version.rb +1 -1
- data/lib/shared_count/cli.rb +94 -16
- data/spec/shared_count/cli_spec.rb +7 -6
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dbac3484620be6939c0dd0941d049ae62c693901
|
4
|
+
data.tar.gz: af1587d6aef436efdd9aa2d07eb4b3a9073fd27b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 08ea24413ded85fa2ab27f187769e178459dcaf64ee67690c60e6670c090b1fda30de1d7422abab6635ab45ad4b4f0eaa704b02075f99987b1a8f6eb9c3c4fe8
|
7
|
+
data.tar.gz: 35fd4f6093cab45615bde109e1ae42fda27614a798c950375acdd9bba7ee8668f9773ac52ff30169f5ff7c16283f37962f5981415aeb91fbe2009ebb05e8f029
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -13,13 +13,20 @@ CLI to the shared_count_api gem
|
|
13
13
|
* Redirect the CSV file to stdout
|
14
14
|
|
15
15
|
```ruby
|
16
|
-
$
|
16
|
+
$ shared_count-cli ~/Desktop/file?.txt # => ~/Desktop/file1.txt ~/Desktop/file2.txt
|
17
17
|
```
|
18
18
|
|
19
19
|
* Redirect the CSV file to a file
|
20
20
|
|
21
21
|
```ruby
|
22
|
-
$
|
22
|
+
$ shared_count-cli ~/Desktop/file?.txt > ~/Desktop/output.csv
|
23
|
+
```
|
24
|
+
|
25
|
+
* Launch it in debug mode
|
26
|
+
|
27
|
+
```ruby
|
28
|
+
$ DEBUG=true shared_count-cli ~/Desktop/file?.txt > ~/Desktop/output.csv
|
29
|
+
$ tail -f shared_count-cli.log
|
23
30
|
```
|
24
31
|
|
25
32
|
## Contributing
|
data/bin/shared_count-cli
CHANGED
data/lib/shared_count/cli.rb
CHANGED
@@ -1,4 +1,7 @@
|
|
1
1
|
require "csv"
|
2
|
+
require "logger"
|
3
|
+
require "thread"
|
4
|
+
require "uri"
|
2
5
|
require "shared_count_api"
|
3
6
|
|
4
7
|
require_relative "../../config/initializers/dotenv"
|
@@ -8,36 +11,111 @@ require_relative "cli/version"
|
|
8
11
|
|
9
12
|
module SharedCount
|
10
13
|
module Cli
|
14
|
+
JOIN_TIMEOUT = 5 # seconds
|
15
|
+
SLEEP_TIME = 2.5 # seconds
|
16
|
+
MAX_RETRIES = 3
|
17
|
+
LINES_PER_ITERATION = 1000
|
18
|
+
MAX_CONCURRENCY = 50
|
19
|
+
|
11
20
|
class << self
|
12
21
|
def run(lines)
|
13
22
|
configure_shared_count_client
|
14
23
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
24
|
+
iterations, mod = lines.length.divmod(LINES_PER_ITERATION)
|
25
|
+
iterations += 1 if mod > 0
|
26
|
+
results = Queue.new
|
27
|
+
|
28
|
+
iterations.times do |iteration|
|
29
|
+
logger.error "Iteration ##{iteration + 1}"
|
30
|
+
queue = Queue.new
|
31
|
+
from = LINES_PER_ITERATION * iteration
|
32
|
+
lines[from, LINES_PER_ITERATION].each { |url| queue.push(url) }
|
33
|
+
thread_count = [MAX_CONCURRENCY, lines.length].min
|
34
|
+
|
35
|
+
threads = (0...thread_count).map do |thread|
|
36
|
+
Thread.new(thread) do |thread|
|
37
|
+
error = 0
|
38
|
+
|
39
|
+
url = begin
|
40
|
+
queue.pop(true)
|
41
|
+
rescue ThreadError; end
|
42
|
+
|
43
|
+
while url do
|
44
|
+
url.chomp!
|
45
|
+
uri = URI(url)
|
46
|
+
host = uri.host || url[/\Ahttps?:\/\/([^\/]+)/, 1]
|
47
|
+
url = "#{uri.scheme}://#{host}"
|
48
|
+
|
49
|
+
response = nil
|
50
|
+
begin
|
51
|
+
response = SharedCountApi::Client.new(url).response
|
52
|
+
rescue SharedCountApi::Error
|
53
|
+
logger.error "[Thread ##{thread}] - error while processing '#{url}'"
|
54
|
+
rescue => err
|
55
|
+
logger.error "[Thread ##{thread}] - error while processing '#{url}', retry: ##{error} - #{err.inspect}"
|
56
|
+
error += 1
|
57
|
+
sleep(SLEEP_TIME)
|
58
|
+
if error <= MAX_RETRIES
|
59
|
+
retry
|
60
|
+
else
|
61
|
+
queue.push(url)
|
62
|
+
break
|
63
|
+
end
|
64
|
+
else
|
65
|
+
error = 0
|
66
|
+
end
|
67
|
+
|
68
|
+
if response
|
69
|
+
logger.debug "[Thread ##{thread}] - #{url}"
|
70
|
+
|
71
|
+
facebook_metrics = response.delete("Facebook")
|
72
|
+
facebook_metrics = {} unless facebook_metrics.is_a?(Hash)
|
73
|
+
values = response.values.unshift(url)
|
74
|
+
results.push(values.concat(facebook_metrics.values))
|
75
|
+
else
|
76
|
+
logger.warn "[Thread ##{thread}] - no response for '#{url}'"
|
77
|
+
end
|
78
|
+
|
79
|
+
url = begin
|
80
|
+
queue.pop(true)
|
81
|
+
rescue ThreadError; end
|
82
|
+
end
|
23
83
|
end
|
84
|
+
end
|
24
85
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
86
|
+
threads.each do |thread|
|
87
|
+
begin
|
88
|
+
thread.join(JOIN_TIMEOUT)
|
89
|
+
rescue => err
|
90
|
+
logger.error "[Thread ##{thread}] - error while joining main thread: #{err.inspect}"
|
91
|
+
logger.error "[Thread ##{thread}] - #{err.backtrace.join("\n")}"
|
31
92
|
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
32
96
|
|
33
|
-
|
34
|
-
|
97
|
+
CSV.generate do |csv|
|
98
|
+
csv << %w(URL StumbleUpon Reddit Delicious GooglePlusOne Buzz Twitter Diggs Pinterest LinkedIn commentsbox_count click_count total_count comment_count like_count share_count)
|
99
|
+
csv << []
|
100
|
+
loop do
|
101
|
+
begin
|
102
|
+
arr = results.pop(true)
|
103
|
+
csv << arr
|
104
|
+
rescue ThreadError
|
105
|
+
break
|
106
|
+
end
|
35
107
|
end
|
36
108
|
end
|
37
109
|
end
|
38
110
|
|
39
111
|
private
|
40
112
|
|
113
|
+
def logger
|
114
|
+
@logger ||= Logger.new("shared_count-cli.log").tap do |logger|
|
115
|
+
logger.level = ENV["DEBUG"] ? Logger::DEBUG : Logger::ERROR
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
41
119
|
def configure_shared_count_client
|
42
120
|
SharedCountApi.configure do |config|
|
43
121
|
config.apikey = ENV["SHARED_COUNT_APIKEY"]
|
@@ -4,18 +4,19 @@ require_relative "../../lib/shared_count/cli"
|
|
4
4
|
|
5
5
|
describe SharedCount::Cli do
|
6
6
|
describe ".run" do
|
7
|
-
let(:
|
7
|
+
let(:lines) do
|
8
8
|
["http://slashdot.org\n", "http://bbc.co.uk\n", "http://www.lanacion.com.ar\n", "http://www.theguardian.com\n"]
|
9
9
|
end
|
10
10
|
|
11
11
|
it "queries the SharedCount API for each URL passed in" do
|
12
|
-
csv = SharedCount::Cli.run(
|
12
|
+
csv = SharedCount::Cli.run(lines)
|
13
13
|
|
14
14
|
arr = CSV.parse(csv, headers: :first_row)
|
15
|
-
arr[1]["URL"]
|
16
|
-
|
17
|
-
|
18
|
-
|
15
|
+
urls = [arr[1]["URL"], arr[2]["URL"], arr[3]["URL"], arr[4]["URL"]]
|
16
|
+
lines.each do |line|
|
17
|
+
line.chomp!
|
18
|
+
urls.any? { |url| url == line }.must_be_same_as true
|
19
|
+
end
|
19
20
|
end
|
20
21
|
end
|
21
22
|
end
|