shared_count-cli 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0c3111abeaf8c7634190475c25146259569027a4
4
- data.tar.gz: 52a4ad77c5fd35502554a1a3a872f9b4f07b0a0e
3
+ metadata.gz: dbac3484620be6939c0dd0941d049ae62c693901
4
+ data.tar.gz: af1587d6aef436efdd9aa2d07eb4b3a9073fd27b
5
5
  SHA512:
6
- metadata.gz: e91498ba39a144ba4478a6d8be707d01937b4d6d245105ddcceb7a49fa3457b880c7d009f7fe3045a962d85ff69cd8442631c7ad003ef4e6ab372550e4af19cb
7
- data.tar.gz: 53d75baa64822207eda1fdbae2ba4dd78a49a0a5bfeb1600bd5e8095d032bdc65a98917f59cfdbe1c570f70466f1de462686ef50b763ad2180500ec537df9402
6
+ metadata.gz: 08ea24413ded85fa2ab27f187769e178459dcaf64ee67690c60e6670c090b1fda30de1d7422abab6635ab45ad4b4f0eaa704b02075f99987b1a8f6eb9c3c4fe8
7
+ data.tar.gz: 35fd4f6093cab45615bde109e1ae42fda27614a798c950375acdd9bba7ee8668f9773ac52ff30169f5ff7c16283f37962f5981415aeb91fbe2009ebb05e8f029
data/.gitignore CHANGED
@@ -17,3 +17,4 @@ test/version_tmp
17
17
  tmp
18
18
  .env
19
19
  .rbenv-gemsets
20
+ *.log
data/README.md CHANGED
@@ -13,13 +13,20 @@ CLI to the shared_count_api gem
13
13
  * Redirect the CSV file to stdout
14
14
 
15
15
  ```ruby
16
- $ ruby bin/shared_count-cli ~/Desktop/file?.txt # => ~/Desktop/file1.txt ~/Desktop/file2.txt
16
+ $ shared_count-cli ~/Desktop/file?.txt # => ~/Desktop/file1.txt ~/Desktop/file2.txt
17
17
  ```
18
18
 
19
19
  * Redirect the CSV file to a file
20
20
 
21
21
  ```ruby
22
- $ ruby bin/shared_count-cli ~/Desktop/file?.txt > ~/Desktop/output.csv
22
+ $ shared_count-cli ~/Desktop/file?.txt > ~/Desktop/output.csv
23
+ ```
24
+
25
+ * Launch it in debug mode
26
+
27
+ ```ruby
28
+ $ DEBUG=true shared_count-cli ~/Desktop/file?.txt > ~/Desktop/output.csv
29
+ $ tail -f shared_count-cli.log
23
30
  ```
24
31
 
25
32
  ## Contributing
data/bin/shared_count-cli CHANGED
@@ -3,6 +3,7 @@
3
3
  require_relative "../lib/shared_count/cli"
4
4
 
5
5
  urls = ARGF.readlines
6
+ urls.reject! { |url| url.empty? }
6
7
  urls.map! { |url| url.start_with?("http") ? url : "http://#{url}" }
7
8
 
8
9
  csv = SharedCount::Cli.run(urls)
@@ -1,5 +1,5 @@
1
1
  module SharedCount
2
2
  module Cli
3
- VERSION = "0.0.1"
3
+ VERSION = "0.0.2"
4
4
  end
5
5
  end
@@ -1,4 +1,7 @@
1
1
  require "csv"
2
+ require "logger"
3
+ require "thread"
4
+ require "uri"
2
5
  require "shared_count_api"
3
6
 
4
7
  require_relative "../../config/initializers/dotenv"
@@ -8,36 +11,111 @@ require_relative "cli/version"
8
11
 
9
12
  module SharedCount
10
13
  module Cli
14
+ JOIN_TIMEOUT = 5 # seconds
15
+ SLEEP_TIME = 2.5 # seconds
16
+ MAX_RETRIES = 3
17
+ LINES_PER_ITERATION = 1000
18
+ MAX_CONCURRENCY = 50
19
+
11
20
  class << self
12
21
  def run(lines)
13
22
  configure_shared_count_client
14
23
 
15
- CSV.generate do |csv|
16
- lines.each_with_index do |url, i|
17
- url.chomp!
18
- response = nil
19
- begin
20
- response = SharedCountApi::Client.new(url).response
21
- rescue SharedCountApi::Error
22
- next
24
+ iterations, mod = lines.length.divmod(LINES_PER_ITERATION)
25
+ iterations += 1 if mod > 0
26
+ results = Queue.new
27
+
28
+ iterations.times do |iteration|
29
+ logger.error "Iteration ##{iteration + 1}"
30
+ queue = Queue.new
31
+ from = LINES_PER_ITERATION * iteration
32
+ lines[from, LINES_PER_ITERATION].each { |url| queue.push(url) }
33
+ thread_count = [MAX_CONCURRENCY, lines.length].min
34
+
35
+ threads = (0...thread_count).map do |thread|
36
+ Thread.new(thread) do |thread|
37
+ error = 0
38
+
39
+ url = begin
40
+ queue.pop(true)
41
+ rescue ThreadError; end
42
+
43
+ while url do
44
+ url.chomp!
45
+ uri = URI(url)
46
+ host = uri.host || url[/\Ahttps?:\/\/([^\/]+)/, 1]
47
+ url = "#{uri.scheme}://#{host}"
48
+
49
+ response = nil
50
+ begin
51
+ response = SharedCountApi::Client.new(url).response
52
+ rescue SharedCountApi::Error
53
+ logger.error "[Thread ##{thread}] - error while processing '#{url}'"
54
+ rescue => err
55
+ logger.error "[Thread ##{thread}] - error while processing '#{url}', retry: ##{error} - #{err.inspect}"
56
+ error += 1
57
+ sleep(SLEEP_TIME)
58
+ if error <= MAX_RETRIES
59
+ retry
60
+ else
61
+ queue.push(url)
62
+ break
63
+ end
64
+ else
65
+ error = 0
66
+ end
67
+
68
+ if response
69
+ logger.debug "[Thread ##{thread}] - #{url}"
70
+
71
+ facebook_metrics = response.delete("Facebook")
72
+ facebook_metrics = {} unless facebook_metrics.is_a?(Hash)
73
+ values = response.values.unshift(url)
74
+ results.push(values.concat(facebook_metrics.values))
75
+ else
76
+ logger.warn "[Thread ##{thread}] - no response for '#{url}'"
77
+ end
78
+
79
+ url = begin
80
+ queue.pop(true)
81
+ rescue ThreadError; end
82
+ end
23
83
  end
84
+ end
24
85
 
25
- facebook_metrics = response.delete("Facebook")
26
- if i.zero?
27
- keys = response.keys.unshift("URL")
28
- headers = keys.concat(facebook_metrics.keys)
29
- csv << headers
30
- csv << []
86
+ threads.each do |thread|
87
+ begin
88
+ thread.join(JOIN_TIMEOUT)
89
+ rescue => err
90
+ logger.error "[Thread ##{thread}] - error while joining main thread: #{err.inspect}"
91
+ logger.error "[Thread ##{thread}] - #{err.backtrace.join("\n")}"
31
92
  end
93
+ end
94
+ end
95
+
32
96
 
33
- values = response.values.unshift(url)
34
- csv << values.concat(facebook_metrics.values)
97
+ CSV.generate do |csv|
98
+ csv << %w(URL StumbleUpon Reddit Delicious GooglePlusOne Buzz Twitter Diggs Pinterest LinkedIn commentsbox_count click_count total_count comment_count like_count share_count)
99
+ csv << []
100
+ loop do
101
+ begin
102
+ arr = results.pop(true)
103
+ csv << arr
104
+ rescue ThreadError
105
+ break
106
+ end
35
107
  end
36
108
  end
37
109
  end
38
110
 
39
111
  private
40
112
 
113
+ def logger
114
+ @logger ||= Logger.new("shared_count-cli.log").tap do |logger|
115
+ logger.level = ENV["DEBUG"] ? Logger::DEBUG : Logger::ERROR
116
+ end
117
+ end
118
+
41
119
  def configure_shared_count_client
42
120
  SharedCountApi.configure do |config|
43
121
  config.apikey = ENV["SHARED_COUNT_APIKEY"]
@@ -4,18 +4,19 @@ require_relative "../../lib/shared_count/cli"
4
4
 
5
5
  describe SharedCount::Cli do
6
6
  describe ".run" do
7
- let(:urls) do
7
+ let(:lines) do
8
8
  ["http://slashdot.org\n", "http://bbc.co.uk\n", "http://www.lanacion.com.ar\n", "http://www.theguardian.com\n"]
9
9
  end
10
10
 
11
11
  it "queries the SharedCount API for each URL passed in" do
12
- csv = SharedCount::Cli.run(urls)
12
+ csv = SharedCount::Cli.run(lines)
13
13
 
14
14
  arr = CSV.parse(csv, headers: :first_row)
15
- arr[1]["URL"].must_equal "http://slashdot.org"
16
- arr[2]["URL"].must_equal "http://bbc.co.uk"
17
- arr[3]["URL"].must_equal "http://www.lanacion.com.ar"
18
- arr[4]["URL"].must_equal "http://www.theguardian.com"
15
+ urls = [arr[1]["URL"], arr[2]["URL"], arr[3]["URL"], arr[4]["URL"]]
16
+ lines.each do |line|
17
+ line.chomp!
18
+ urls.any? { |url| url == line }.must_be_same_as true
19
+ end
19
20
  end
20
21
  end
21
22
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: shared_count-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cristian Rasch