shared_count-cli 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0c3111abeaf8c7634190475c25146259569027a4
4
- data.tar.gz: 52a4ad77c5fd35502554a1a3a872f9b4f07b0a0e
3
+ metadata.gz: dbac3484620be6939c0dd0941d049ae62c693901
4
+ data.tar.gz: af1587d6aef436efdd9aa2d07eb4b3a9073fd27b
5
5
  SHA512:
6
- metadata.gz: e91498ba39a144ba4478a6d8be707d01937b4d6d245105ddcceb7a49fa3457b880c7d009f7fe3045a962d85ff69cd8442631c7ad003ef4e6ab372550e4af19cb
7
- data.tar.gz: 53d75baa64822207eda1fdbae2ba4dd78a49a0a5bfeb1600bd5e8095d032bdc65a98917f59cfdbe1c570f70466f1de462686ef50b763ad2180500ec537df9402
6
+ metadata.gz: 08ea24413ded85fa2ab27f187769e178459dcaf64ee67690c60e6670c090b1fda30de1d7422abab6635ab45ad4b4f0eaa704b02075f99987b1a8f6eb9c3c4fe8
7
+ data.tar.gz: 35fd4f6093cab45615bde109e1ae42fda27614a798c950375acdd9bba7ee8668f9773ac52ff30169f5ff7c16283f37962f5981415aeb91fbe2009ebb05e8f029
data/.gitignore CHANGED
@@ -17,3 +17,4 @@ test/version_tmp
17
17
  tmp
18
18
  .env
19
19
  .rbenv-gemsets
20
+ *.log
data/README.md CHANGED
@@ -13,13 +13,20 @@ CLI to the shared_count_api gem
13
13
  * Redirect the CSV file to stdout
14
14
 
15
15
  ```ruby
16
- $ ruby bin/shared_count-cli ~/Desktop/file?.txt # => ~/Desktop/file1.txt ~/Desktop/file2.txt
16
+ $ shared_count-cli ~/Desktop/file?.txt # => ~/Desktop/file1.txt ~/Desktop/file2.txt
17
17
  ```
18
18
 
19
19
  * Redirect the CSV file to a file
20
20
 
21
21
  ```ruby
22
- $ ruby bin/shared_count-cli ~/Desktop/file?.txt > ~/Desktop/output.csv
22
+ $ shared_count-cli ~/Desktop/file?.txt > ~/Desktop/output.csv
23
+ ```
24
+
25
+ * Launch it in debug mode
26
+
27
+ ```ruby
28
+ $ DEBUG=true shared_count-cli ~/Desktop/file?.txt > ~/Desktop/output.csv
29
+ $ tail -f shared_count-cli.log
23
30
  ```
24
31
 
25
32
  ## Contributing
data/bin/shared_count-cli CHANGED
@@ -3,6 +3,7 @@
3
3
  require_relative "../lib/shared_count/cli"
4
4
 
5
5
  urls = ARGF.readlines
6
+ urls.reject! { |url| url.empty? }
6
7
  urls.map! { |url| url.start_with?("http") ? url : "http://#{url}" }
7
8
 
8
9
  csv = SharedCount::Cli.run(urls)
@@ -1,5 +1,5 @@
1
1
  module SharedCount
2
2
  module Cli
3
- VERSION = "0.0.1"
3
+ VERSION = "0.0.2"
4
4
  end
5
5
  end
@@ -1,4 +1,7 @@
1
1
  require "csv"
2
+ require "logger"
3
+ require "thread"
4
+ require "uri"
2
5
  require "shared_count_api"
3
6
 
4
7
  require_relative "../../config/initializers/dotenv"
@@ -8,36 +11,111 @@ require_relative "cli/version"
8
11
 
9
12
  module SharedCount
10
13
  module Cli
14
+ JOIN_TIMEOUT = 5 # seconds
15
+ SLEEP_TIME = 2.5 # seconds
16
+ MAX_RETRIES = 3
17
+ LINES_PER_ITERATION = 1000
18
+ MAX_CONCURRENCY = 50
19
+
11
20
  class << self
12
21
  def run(lines)
13
22
  configure_shared_count_client
14
23
 
15
- CSV.generate do |csv|
16
- lines.each_with_index do |url, i|
17
- url.chomp!
18
- response = nil
19
- begin
20
- response = SharedCountApi::Client.new(url).response
21
- rescue SharedCountApi::Error
22
- next
24
+ iterations, mod = lines.length.divmod(LINES_PER_ITERATION)
25
+ iterations += 1 if mod > 0
26
+ results = Queue.new
27
+
28
+ iterations.times do |iteration|
29
+ logger.error "Iteration ##{iteration + 1}"
30
+ queue = Queue.new
31
+ from = LINES_PER_ITERATION * iteration
32
+ lines[from, LINES_PER_ITERATION].each { |url| queue.push(url) }
33
+ thread_count = [MAX_CONCURRENCY, lines.length].min
34
+
35
+ threads = (0...thread_count).map do |thread|
36
+ Thread.new(thread) do |thread|
37
+ error = 0
38
+
39
+ url = begin
40
+ queue.pop(true)
41
+ rescue ThreadError; end
42
+
43
+ while url do
44
+ url.chomp!
45
+ uri = URI(url)
46
+ host = uri.host || url[/\Ahttps?:\/\/([^\/]+)/, 1]
47
+ url = "#{uri.scheme}://#{host}"
48
+
49
+ response = nil
50
+ begin
51
+ response = SharedCountApi::Client.new(url).response
52
+ rescue SharedCountApi::Error
53
+ logger.error "[Thread ##{thread}] - error while processing '#{url}'"
54
+ rescue => err
55
+ logger.error "[Thread ##{thread}] - error while processing '#{url}', retry: ##{error} - #{err.inspect}"
56
+ error += 1
57
+ sleep(SLEEP_TIME)
58
+ if error <= MAX_RETRIES
59
+ retry
60
+ else
61
+ queue.push(url)
62
+ break
63
+ end
64
+ else
65
+ error = 0
66
+ end
67
+
68
+ if response
69
+ logger.debug "[Thread ##{thread}] - #{url}"
70
+
71
+ facebook_metrics = response.delete("Facebook")
72
+ facebook_metrics = {} unless facebook_metrics.is_a?(Hash)
73
+ values = response.values.unshift(url)
74
+ results.push(values.concat(facebook_metrics.values))
75
+ else
76
+ logger.warn "[Thread ##{thread}] - no response for '#{url}'"
77
+ end
78
+
79
+ url = begin
80
+ queue.pop(true)
81
+ rescue ThreadError; end
82
+ end
23
83
  end
84
+ end
24
85
 
25
- facebook_metrics = response.delete("Facebook")
26
- if i.zero?
27
- keys = response.keys.unshift("URL")
28
- headers = keys.concat(facebook_metrics.keys)
29
- csv << headers
30
- csv << []
86
+ threads.each do |thread|
87
+ begin
88
+ thread.join(JOIN_TIMEOUT)
89
+ rescue => err
90
+ logger.error "[Thread ##{thread}] - error while joining main thread: #{err.inspect}"
91
+ logger.error "[Thread ##{thread}] - #{err.backtrace.join("\n")}"
31
92
  end
93
+ end
94
+ end
95
+
32
96
 
33
- values = response.values.unshift(url)
34
- csv << values.concat(facebook_metrics.values)
97
+ CSV.generate do |csv|
98
+ csv << %w(URL StumbleUpon Reddit Delicious GooglePlusOne Buzz Twitter Diggs Pinterest LinkedIn commentsbox_count click_count total_count comment_count like_count share_count)
99
+ csv << []
100
+ loop do
101
+ begin
102
+ arr = results.pop(true)
103
+ csv << arr
104
+ rescue ThreadError
105
+ break
106
+ end
35
107
  end
36
108
  end
37
109
  end
38
110
 
39
111
  private
40
112
 
113
+ def logger
114
+ @logger ||= Logger.new("shared_count-cli.log").tap do |logger|
115
+ logger.level = ENV["DEBUG"] ? Logger::DEBUG : Logger::ERROR
116
+ end
117
+ end
118
+
41
119
  def configure_shared_count_client
42
120
  SharedCountApi.configure do |config|
43
121
  config.apikey = ENV["SHARED_COUNT_APIKEY"]
@@ -4,18 +4,19 @@ require_relative "../../lib/shared_count/cli"
4
4
 
5
5
  describe SharedCount::Cli do
6
6
  describe ".run" do
7
- let(:urls) do
7
+ let(:lines) do
8
8
  ["http://slashdot.org\n", "http://bbc.co.uk\n", "http://www.lanacion.com.ar\n", "http://www.theguardian.com\n"]
9
9
  end
10
10
 
11
11
  it "queries the SharedCount API for each URL passed in" do
12
- csv = SharedCount::Cli.run(urls)
12
+ csv = SharedCount::Cli.run(lines)
13
13
 
14
14
  arr = CSV.parse(csv, headers: :first_row)
15
- arr[1]["URL"].must_equal "http://slashdot.org"
16
- arr[2]["URL"].must_equal "http://bbc.co.uk"
17
- arr[3]["URL"].must_equal "http://www.lanacion.com.ar"
18
- arr[4]["URL"].must_equal "http://www.theguardian.com"
15
+ urls = [arr[1]["URL"], arr[2]["URL"], arr[3]["URL"], arr[4]["URL"]]
16
+ lines.each do |line|
17
+ line.chomp!
18
+ urls.any? { |url| url == line }.must_be_same_as true
19
+ end
19
20
  end
20
21
  end
21
22
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: shared_count-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cristian Rasch