shared_count-cli 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +7 -1
- data/bin/shared_count-cli +38 -0
- data/lib/shared_count/cli.rb +19 -3
- data/lib/shared_count/cli/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1fe6bc3a8a598572968a5b70e68f8a7fd534d272
|
4
|
+
data.tar.gz: 27bf9e7bac99f698e0b8b453232615b5cfd747c4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: facacde3a1c30afee7008f08dd4760f0b04cd7d4778a4b74433e46190fab6b6b591d01fff8d367ddbc53e7d69e5109a70d18a82ee8ee4fb105c2e18ad87deda2
|
7
|
+
data.tar.gz: 61adc0fac22c6c50c90a0b6f48453810684b362233ee1ee889de6f80f525da26b05283af0df9f1b5ec76f19b4473ae52ee49289035a4312cc8a99a7e5ce3c39d
|
data/README.md
CHANGED
@@ -22,13 +22,19 @@ $ shared_count-cli ~/Desktop/file?.txt # => ~/Desktop/file1.txt ~/Desktop/file2.
|
|
22
22
|
$ shared_count-cli ~/Desktop/file?.txt > ~/Desktop/output.csv
|
23
23
|
```
|
24
24
|
|
25
|
-
*
|
25
|
+
* Run it in debug mode
|
26
26
|
|
27
27
|
```ruby
|
28
28
|
$ DEBUG=true shared_count-cli ~/Desktop/file?.txt > ~/Desktop/output.csv
|
29
29
|
$ tail -f shared_count-cli.log
|
30
30
|
```
|
31
31
|
|
32
|
+
* For more fine control see the available command line options
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
$ shared_count-cli --help
|
36
|
+
```
|
37
|
+
|
32
38
|
## Contributing
|
33
39
|
|
34
40
|
1. Fork it ( http://github.com/<my-github-username>/shared_count-cli/fork )
|
data/bin/shared_count-cli
CHANGED
@@ -1,7 +1,45 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
require "getoptlong"
|
4
|
+
|
5
|
+
opts = GetoptLong.new(
|
6
|
+
['--help', '-h', GetoptLong::NO_ARGUMENT],
|
7
|
+
['--concurrency', '-c', GetoptLong::REQUIRED_ARGUMENT],
|
8
|
+
['--iteration-size', GetoptLong::REQUIRED_ARGUMENT]
|
9
|
+
)
|
10
|
+
|
11
|
+
concurrency, iteration_size = nil
|
12
|
+
opts.each do |opt, arg|
|
13
|
+
case opt
|
14
|
+
when '--help'
|
15
|
+
puts <<-EOF
|
16
|
+
shared_count-cli [OPTION] ... file1 [file2]
|
17
|
+
|
18
|
+
-h, --help:
|
19
|
+
show help
|
20
|
+
|
21
|
+
--concurrency n, -c n:
|
22
|
+
use at most n threads, defaults to 50
|
23
|
+
|
24
|
+
--iteration-size n:
|
25
|
+
process at most n URLs per iteration (group of x threads), defaults to 1000
|
26
|
+
|
27
|
+
EOF
|
28
|
+
exit
|
29
|
+
when '--concurrency'
|
30
|
+
concurrency = arg.to_i
|
31
|
+
when '--iteration-size'
|
32
|
+
iteration_size = arg.to_i
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
3
36
|
require_relative "../lib/shared_count/cli"
|
4
37
|
|
38
|
+
SharedCount::Cli.configure do |config|
|
39
|
+
config.concurrency = concurrency
|
40
|
+
config.iteration_size = iteration_size
|
41
|
+
end
|
42
|
+
|
5
43
|
urls = ARGF.readlines
|
6
44
|
urls.reject! { |url| url.empty? }
|
7
45
|
urls.map! { |url| url.start_with?("http") ? url : "http://#{url}" }
|
data/lib/shared_count/cli.rb
CHANGED
@@ -18,18 +18,22 @@ module SharedCount
|
|
18
18
|
MAX_CONCURRENCY = 50
|
19
19
|
|
20
20
|
class << self
|
21
|
+
attr_writer :concurrency, :iteration_size
|
22
|
+
|
21
23
|
def run(lines)
|
22
24
|
configure_shared_count_client
|
25
|
+
logger.info "Using #{concurrency} threads"
|
26
|
+
logger.info "The iteration size is #{iteration_size} URLs"
|
23
27
|
|
24
|
-
iterations, mod = lines.length.divmod(
|
28
|
+
iterations, mod = lines.length.divmod(iteration_size)
|
25
29
|
iterations += 1 if mod > 0
|
26
30
|
results = Queue.new
|
27
31
|
|
28
32
|
iterations.times do |iteration|
|
29
33
|
logger.error "Iteration ##{iteration + 1}"
|
30
34
|
queue = Queue.new
|
31
|
-
from =
|
32
|
-
lines[from,
|
35
|
+
from = iteration_size * iteration
|
36
|
+
lines[from, iteration_size].each { |url| queue.push(url) }
|
33
37
|
thread_count = [MAX_CONCURRENCY, lines.length].min
|
34
38
|
|
35
39
|
threads = (0...thread_count).map do |thread|
|
@@ -108,6 +112,18 @@ module SharedCount
|
|
108
112
|
end
|
109
113
|
end
|
110
114
|
|
115
|
+
def configure
|
116
|
+
yield self
|
117
|
+
end
|
118
|
+
|
119
|
+
def concurrency
|
120
|
+
@concurrency ||= MAX_CONCURRENCY
|
121
|
+
end
|
122
|
+
|
123
|
+
def iteration_size
|
124
|
+
@iteration_size ||= LINES_PER_ITERATION
|
125
|
+
end
|
126
|
+
|
111
127
|
private
|
112
128
|
|
113
129
|
def logger
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: shared_count-cli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Cristian Rasch
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-01-
|
11
|
+
date: 2014-01-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: shared_count_api
|