pcsv 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +4 -0
- data/lib/pcsv.rb +75 -0
- data/lib/pcsv/version.rb +3 -0
- data/test/pcsv_test.rb +45 -0
- data/test/test_helper.rb +18 -0
- metadata +116 -0
data/README.md
ADDED
data/lib/pcsv.rb
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'pcsv/version'
|
3
|
+
|
4
|
+
class PCSV
|
5
|
+
##############################################################################
|
6
|
+
#
|
7
|
+
# Static Methods
|
8
|
+
#
|
9
|
+
##############################################################################
|
10
|
+
|
11
|
+
# Opens a CSV file and runs the block on each cell in parallel. Returns a
|
12
|
+
# copy of the CSV file.
|
13
|
+
def self.each(path, options={})
|
14
|
+
return process(:each, path, options, &Proc.new)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Opens a CSV file and maps the results of a block on each cell in parallel.
|
18
|
+
# Returns a copy of the CSV file.
|
19
|
+
def self.map(path, options={})
|
20
|
+
return process(:map, path, options, &Proc.new)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Performs a given action on each cell of a CSV file.
|
24
|
+
def self.process(action, path, options={})
|
25
|
+
thread_count = options.delete(:thread_count) || 10
|
26
|
+
|
27
|
+
# Open CSV & build a worker queue.
|
28
|
+
csv = CSV.read(path, options)
|
29
|
+
queue = []
|
30
|
+
csv.each_with_index do |row, row_index|
|
31
|
+
row.fields.each_with_index do |field, col_index|
|
32
|
+
queue << {
|
33
|
+
row_index:row_index,
|
34
|
+
col_index:col_index,
|
35
|
+
row:row,
|
36
|
+
value:field
|
37
|
+
}
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Launch threads and iterate over queue until it's done.
|
42
|
+
mutex = Mutex.new()
|
43
|
+
threads = []
|
44
|
+
thread_count.times do |thread_index|
|
45
|
+
threads << Thread.new() do
|
46
|
+
loop do
|
47
|
+
# Grab an item from the front of the queue.
|
48
|
+
item = nil
|
49
|
+
mutex.synchronize do
|
50
|
+
item = queue.shift()
|
51
|
+
end
|
52
|
+
break if item.nil?
|
53
|
+
|
54
|
+
# Invoke the block with the row info.
|
55
|
+
begin
|
56
|
+
result = yield item, mutex
|
57
|
+
|
58
|
+
if action == :map
|
59
|
+
mutex.synchronize {
|
60
|
+
item[:row][item[:col_index]] = result
|
61
|
+
}
|
62
|
+
end
|
63
|
+
|
64
|
+
rescue StandardError => e
|
65
|
+
warn("[ERROR] #{e.message} [R#{item[:row_index]},C#{item[:col_index]}]")
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
threads.each { |t| t.join }
|
72
|
+
|
73
|
+
return csv
|
74
|
+
end
|
75
|
+
end
|
data/lib/pcsv/version.rb
ADDED
data/test/pcsv_test.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class TestPCSV < MiniTest::Unit::TestCase
|
4
|
+
######################################
|
5
|
+
# Each
|
6
|
+
######################################
|
7
|
+
|
8
|
+
def test_each
|
9
|
+
obj = {}
|
10
|
+
csv = PCSV.each('fixtures/simple.csv', :headers => true) do |item, mutex|
|
11
|
+
mutex.synchronize {
|
12
|
+
obj[item[:value].to_i] = true
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
exp = []
|
17
|
+
(0...50).each {|i| exp << i}
|
18
|
+
assert_equal exp, obj.keys.sort
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
######################################
|
23
|
+
# Map
|
24
|
+
######################################
|
25
|
+
|
26
|
+
def test_map
|
27
|
+
csv = PCSV.map('fixtures/simple.csv', :headers => true) do |item, mutex|
|
28
|
+
item[:value].to_i + 100
|
29
|
+
end
|
30
|
+
|
31
|
+
assert_equal IO.read('fixtures/simple.map.csv'), csv.to_csv
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_map_error
|
35
|
+
csv = PCSV.map('fixtures/simple.csv', :headers => true) do |item, mutex|
|
36
|
+
if item[:value] == '30'
|
37
|
+
raise 'OH NO!'
|
38
|
+
else
|
39
|
+
item[:value].to_i + 100
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
assert_equal IO.read('fixtures/simple.map_error.csv'), csv.to_csv
|
44
|
+
end
|
45
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'bundler/setup'
|
2
|
+
require 'minitest/autorun'
|
3
|
+
require 'mocha'
|
4
|
+
require 'unindentable'
|
5
|
+
require 'pcsv'
|
6
|
+
|
7
|
+
class MiniTest::Unit::TestCase
|
8
|
+
def assert_worksheet exp, worksheet, msg = nil
|
9
|
+
act = []
|
10
|
+
worksheet.each do |row|
|
11
|
+
act << row.map {|cell| cell.to_s.strip }
|
12
|
+
end
|
13
|
+
exp = exp.map {|row| row.map {|cell| '%-10s' % cell.to_s}.join('').rstrip}.join("\n")
|
14
|
+
act = act.map {|row| row.map {|cell| '%-10s' % cell.to_s}.join('').rstrip}.join("\n")
|
15
|
+
assert_equal(exp, act, msg)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
metadata
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pcsv
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Ben Johnson
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-01-24 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rake
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 0.9.2.2
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 0.9.2.2
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: minitest
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 3.5.0
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 3.5.0
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: mocha
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ~>
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 0.12.5
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.12.5
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: unindentable
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ~>
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 0.1.0
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: 0.1.0
|
78
|
+
description:
|
79
|
+
email:
|
80
|
+
- benbjohnson@yahoo.com
|
81
|
+
executables: []
|
82
|
+
extensions: []
|
83
|
+
extra_rdoc_files: []
|
84
|
+
files:
|
85
|
+
- lib/pcsv/version.rb
|
86
|
+
- lib/pcsv.rb
|
87
|
+
- README.md
|
88
|
+
- test/pcsv_test.rb
|
89
|
+
- test/test_helper.rb
|
90
|
+
homepage: http://github.com/benbjohnson/pcsv
|
91
|
+
licenses: []
|
92
|
+
post_install_message:
|
93
|
+
rdoc_options: []
|
94
|
+
require_paths:
|
95
|
+
- lib
|
96
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
103
|
+
none: false
|
104
|
+
requirements:
|
105
|
+
- - ! '>='
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: '0'
|
108
|
+
requirements: []
|
109
|
+
rubyforge_project:
|
110
|
+
rubygems_version: 1.8.24
|
111
|
+
signing_key:
|
112
|
+
specification_version: 3
|
113
|
+
summary: A simple, parallel processing framework for CSV files.
|
114
|
+
test_files:
|
115
|
+
- test/pcsv_test.rb
|
116
|
+
- test/test_helper.rb
|