csv_patch 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +57 -0
- data/Rakefile +31 -0
- data/VERSION +1 -0
- data/bin/csv_patch +35 -0
- data/csv_patch.gemspec +50 -0
- data/lib/csv_patch.rb +22 -0
- data/lib/csv_patch/compression.rb +44 -0
- data/lib/csv_patch/operation.rb +15 -0
- data/lib/csv_patch/patch.rb +44 -0
- data/lib/csv_patch/revision.rb +92 -0
- data/lib/csv_patch/stream_batch.rb +36 -0
- metadata +83 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
YmE4OGVkYThmNGU0NzZiMzdkYWQxYjUzYjc2YzY0OTQzYWU0OTlhNg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
ZWYyZTdkMzc2OTVmYjU2NGFmZmNiMzlkNjA4NGI1ODI1MWUxMWNiMQ==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
OGM1YWFjZWFmNWNhYzA1ODVjMjMzNTQ0MGYxNzhlOWJlNWUzMTI3ZTg3OTc2
|
10
|
+
NDdlMGIyNzc2Y2I1ZmQwOGQ0MTJlZDQwZjEzN2ZmNDZjYTNlODU1M2NkZGFj
|
11
|
+
MTY4ODk5MmM0N2ExMzNhMjU4ODg0NzdlMzVhNjNmYzVlNTliNzU=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
YjE5ZTc5NjYxNjIxZTAzZTk5NzFkOWM4YmZlZDEyNTU5MDBmYWEyYWU0YmZh
|
14
|
+
OTZhNDllNWFhYWNjODkwMzMyNDk2YWQ5Mjg4ZThlZmRkZjE2ZDBiY2IyYWFi
|
15
|
+
ZDA2MWRlMTExZmVmNTA4OGIyODMxOTUwNmM3YmEwNDBkNWIzOGQ=
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
GEM
|
2
|
+
remote: https://rubygems.org/
|
3
|
+
specs:
|
4
|
+
addressable (2.3.6)
|
5
|
+
builder (3.2.2)
|
6
|
+
descendants_tracker (0.0.3)
|
7
|
+
faraday (0.9.0)
|
8
|
+
multipart-post (>= 1.2, < 3)
|
9
|
+
git (1.2.6)
|
10
|
+
github_api (0.11.1)
|
11
|
+
addressable (~> 2.3)
|
12
|
+
descendants_tracker (~> 0.0.1)
|
13
|
+
faraday (~> 0.8, < 0.10)
|
14
|
+
hashie (>= 1.2)
|
15
|
+
multi_json (>= 1.7.5, < 2.0)
|
16
|
+
nokogiri (~> 1.6.0)
|
17
|
+
oauth2
|
18
|
+
hashie (2.0.5)
|
19
|
+
highline (1.6.20)
|
20
|
+
jeweler (2.0.0)
|
21
|
+
builder
|
22
|
+
bundler (>= 1.0)
|
23
|
+
git (>= 1.2.5)
|
24
|
+
github_api
|
25
|
+
highline (>= 1.6.15)
|
26
|
+
nokogiri (>= 1.5.10)
|
27
|
+
rake
|
28
|
+
rdoc
|
29
|
+
json (1.8.1)
|
30
|
+
jwt (0.1.11)
|
31
|
+
multi_json (>= 1.5)
|
32
|
+
metaclass (0.0.4)
|
33
|
+
mini_portile (0.5.2)
|
34
|
+
mocha (1.1.0)
|
35
|
+
metaclass (~> 0.0.1)
|
36
|
+
multi_json (1.10.1)
|
37
|
+
multi_xml (0.5.5)
|
38
|
+
multipart-post (2.0.0)
|
39
|
+
nokogiri (1.6.1)
|
40
|
+
mini_portile (~> 0.5.0)
|
41
|
+
oauth2 (0.9.3)
|
42
|
+
faraday (>= 0.8, < 0.10)
|
43
|
+
jwt (~> 0.1.8)
|
44
|
+
multi_json (~> 1.3)
|
45
|
+
multi_xml (~> 0.5)
|
46
|
+
rack (~> 1.2)
|
47
|
+
rack (1.5.2)
|
48
|
+
rake (10.3.2)
|
49
|
+
rdoc (4.1.1)
|
50
|
+
json (~> 1.4)
|
51
|
+
|
52
|
+
PLATFORMS
|
53
|
+
ruby
|
54
|
+
|
55
|
+
DEPENDENCIES
|
56
|
+
jeweler
|
57
|
+
mocha
|
data/Rakefile
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'jeweler'
|
4
|
+
require 'rake/testtask'
|
5
|
+
require 'rdoc/task'
|
6
|
+
|
7
|
+
Jeweler::Tasks.new do |gem|
|
8
|
+
gem.name = 'csv_patch'
|
9
|
+
gem.summary = %Q{Apply diffs to a CSV file}
|
10
|
+
gem.description = %Q{Applies a list of changes in a given format to a CSV file}
|
11
|
+
gem.email = 'lyudmilangelov@gmail.com'
|
12
|
+
gem.authors = ['Lyudmil']
|
13
|
+
gem.files.exclude 'test/**/*', '.*'
|
14
|
+
|
15
|
+
gem.executables = ['csv_patch']
|
16
|
+
end
|
17
|
+
|
18
|
+
Rake::TestTask.new(:test) do |test|
|
19
|
+
test.libs << 'lib' << 'test'
|
20
|
+
test.pattern = 'test/**/*_test.rb'
|
21
|
+
test.verbose = true
|
22
|
+
end
|
23
|
+
|
24
|
+
Rake::RDocTask.new do |rdoc|
|
25
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ''
|
26
|
+
|
27
|
+
rdoc.rdoc_dir = 'rdoc'
|
28
|
+
rdoc.title = 'csv_patch #{version}'
|
29
|
+
rdoc.rdoc_files.include('README*')
|
30
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
31
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
1.0.1
|
data/bin/csv_patch
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'csv_patch'
|
5
|
+
|
6
|
+
options = {}
|
7
|
+
|
8
|
+
OptionParser.new do |opts|
|
9
|
+
opts.banner = 'Usage: csv_patch [options]'
|
10
|
+
|
11
|
+
opts.on('-iINPUT_FILE', '--input=INPUT_FILE', 'CSV file to patch (required)') do |input_file|
|
12
|
+
options[:input] = File.new(input_file, 'r')
|
13
|
+
end
|
14
|
+
|
15
|
+
opts.on('-oOUTPUT_FILE', '--output=OUTPUT_FILE', 'File to store the patched version in (required)') do |output_file|
|
16
|
+
options[:output] = File.new(output_file, 'w')
|
17
|
+
end
|
18
|
+
|
19
|
+
opts.on('-cCHANGES_FILE', '--changes=CHANGES_FILE', 'File containing the changes to apply (required)') do |changes_file|
|
20
|
+
options[:changes] = File.new(changes_file, 'r')
|
21
|
+
end
|
22
|
+
|
23
|
+
opts.on('-bBATCH_SIZE', '--batch_size=BATCH_SIZE', 'Number of changes to read before applying a patch') do |batch_size|
|
24
|
+
options[:batch_size] = batch_size.to_i
|
25
|
+
end
|
26
|
+
|
27
|
+
end.parse!
|
28
|
+
|
29
|
+
print 'Patching... '
|
30
|
+
|
31
|
+
CsvPatch.patch(options)
|
32
|
+
|
33
|
+
puts 'done'
|
34
|
+
|
35
|
+
[:input, :output, :changes].each { |file| options[file].close if options[file] }
|
data/csv_patch.gemspec
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
# stub: csv_patch 1.0.1 ruby lib
|
6
|
+
|
7
|
+
Gem::Specification.new do |s|
|
8
|
+
s.name = "csv_patch"
|
9
|
+
s.version = "1.0.1"
|
10
|
+
|
11
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
|
+
s.require_paths = ["lib"]
|
13
|
+
s.authors = ["Lyudmil"]
|
14
|
+
s.date = "2015-10-27"
|
15
|
+
s.description = "Applies a list of changes in a given format to a CSV file"
|
16
|
+
s.email = "lyudmilangelov@gmail.com"
|
17
|
+
s.executables = ["csv_patch"]
|
18
|
+
s.files = [
|
19
|
+
"Gemfile",
|
20
|
+
"Gemfile.lock",
|
21
|
+
"Rakefile",
|
22
|
+
"VERSION",
|
23
|
+
"bin/csv_patch",
|
24
|
+
"csv_patch.gemspec",
|
25
|
+
"lib/csv_patch.rb",
|
26
|
+
"lib/csv_patch/compression.rb",
|
27
|
+
"lib/csv_patch/operation.rb",
|
28
|
+
"lib/csv_patch/patch.rb",
|
29
|
+
"lib/csv_patch/revision.rb",
|
30
|
+
"lib/csv_patch/stream_batch.rb"
|
31
|
+
]
|
32
|
+
s.rubygems_version = "2.2.2"
|
33
|
+
s.summary = "Apply diffs to a CSV file"
|
34
|
+
|
35
|
+
if s.respond_to? :specification_version then
|
36
|
+
s.specification_version = 4
|
37
|
+
|
38
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
39
|
+
s.add_development_dependency(%q<jeweler>, [">= 0"])
|
40
|
+
s.add_development_dependency(%q<mocha>, [">= 0"])
|
41
|
+
else
|
42
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
43
|
+
s.add_dependency(%q<mocha>, [">= 0"])
|
44
|
+
end
|
45
|
+
else
|
46
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
47
|
+
s.add_dependency(%q<mocha>, [">= 0"])
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
data/lib/csv_patch.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'csv_patch/stream_batch'
|
2
|
+
require 'csv_patch/patch'
|
3
|
+
|
4
|
+
module CsvPatch
|
5
|
+
|
6
|
+
DEFAULT_BATCH_SIZE = 500
|
7
|
+
|
8
|
+
def self.patch options
|
9
|
+
batches(options).each { |changes| apply_patch(options, changes) }
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def self.batches options
|
15
|
+
StreamBatch.new(options[:changes], options[:batch_size] || DEFAULT_BATCH_SIZE)
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.apply_patch options, changes
|
19
|
+
Patch.new(input: options[:input], output: options[:output], changes: changes).apply
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'csv_patch/operation'
|
2
|
+
|
3
|
+
module CsvPatch
|
4
|
+
|
5
|
+
class Compression < Operation
|
6
|
+
|
7
|
+
def initialize input_stream, output_stream, column_metadata
|
8
|
+
@empty_columns = column_metadata[:empty_columns]
|
9
|
+
@headers = column_metadata[:columns]
|
10
|
+
|
11
|
+
@input_stream = input_stream
|
12
|
+
@output_stream = output_stream
|
13
|
+
end
|
14
|
+
|
15
|
+
def execute
|
16
|
+
emit_header_row
|
17
|
+
emit_compressed_data
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def emit_header_row
|
23
|
+
emit remove_empty_columns_from(@headers)
|
24
|
+
end
|
25
|
+
|
26
|
+
def emit_compressed_data
|
27
|
+
emit compress(@input_stream.gets) until @input_stream.eof?
|
28
|
+
end
|
29
|
+
|
30
|
+
def emit row
|
31
|
+
@output_stream.puts csv_line(row)
|
32
|
+
end
|
33
|
+
|
34
|
+
def compress line
|
35
|
+
remove_empty_columns_from csv_values(line)
|
36
|
+
end
|
37
|
+
|
38
|
+
def remove_empty_columns_from row
|
39
|
+
row.reject.with_index { |value, index| @empty_columns.include?(index) }
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
require 'csv_patch/revision'
|
3
|
+
require 'csv_patch/compression'
|
4
|
+
|
5
|
+
module CsvPatch
|
6
|
+
|
7
|
+
class Patch
|
8
|
+
|
9
|
+
TEMPFILE_NAME = 'csv_patch'
|
10
|
+
|
11
|
+
def initialize options
|
12
|
+
@input, @output = options[:input], options[:output]
|
13
|
+
|
14
|
+
@revision_result = Tempfile.new(TEMPFILE_NAME)
|
15
|
+
@revision = Revision.new(options[:changes], @revision_result)
|
16
|
+
end
|
17
|
+
|
18
|
+
def apply
|
19
|
+
apply_changes
|
20
|
+
compress
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def apply_changes
|
26
|
+
@revision.header_line(@input.gets)
|
27
|
+
@revision.replace_line(@input.gets) until @input.eof?
|
28
|
+
@revision.add_new_lines
|
29
|
+
end
|
30
|
+
|
31
|
+
def compress
|
32
|
+
@revision_result.rewind
|
33
|
+
|
34
|
+
compression.execute
|
35
|
+
|
36
|
+
@revision_result.close
|
37
|
+
end
|
38
|
+
|
39
|
+
def compression
|
40
|
+
Compression.new(@revision_result, @output, @revision.column_metadata)
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'csv_patch/operation'
|
2
|
+
|
3
|
+
module CsvPatch
|
4
|
+
|
5
|
+
class Revision < Operation
|
6
|
+
|
7
|
+
EMPTY_LINE = "\n"
|
8
|
+
|
9
|
+
def initialize changes, output_stream
|
10
|
+
@output_stream = output_stream
|
11
|
+
@changes = changes
|
12
|
+
|
13
|
+
header_line(EMPTY_LINE)
|
14
|
+
end
|
15
|
+
|
16
|
+
def header_line line
|
17
|
+
return unless line
|
18
|
+
|
19
|
+
@columns = csv_values(line)
|
20
|
+
mark_all_columns_empty
|
21
|
+
end
|
22
|
+
|
23
|
+
def replace_line line
|
24
|
+
emit replacement_line_for csv_values(line)
|
25
|
+
end
|
26
|
+
|
27
|
+
def add_new_lines
|
28
|
+
@changes.values.each do |addition|
|
29
|
+
emit generate_new_row(addition)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def column_metadata
|
34
|
+
{ columns: @columns, empty_columns: @empty_columns }
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def mark_all_columns_empty
|
40
|
+
@empty_columns = (0..@columns.size - 1).to_a
|
41
|
+
end
|
42
|
+
|
43
|
+
def emit line
|
44
|
+
return if line.nil?
|
45
|
+
@output_stream.puts line
|
46
|
+
end
|
47
|
+
|
48
|
+
def changed? row
|
49
|
+
@changes.has_key? id_of(row)
|
50
|
+
end
|
51
|
+
|
52
|
+
def replacement_line_for row
|
53
|
+
return create_output_line_from(row) unless changed?(row)
|
54
|
+
generate_new_row change_for(row)
|
55
|
+
end
|
56
|
+
|
57
|
+
def generate_new_row row_data
|
58
|
+
return if row_data.nil?
|
59
|
+
|
60
|
+
update_schema_to_reflect row_data
|
61
|
+
line_for row_data
|
62
|
+
end
|
63
|
+
|
64
|
+
def line_for change
|
65
|
+
values = @columns.collect { |column| change[column] }
|
66
|
+
create_output_line_from(values)
|
67
|
+
end
|
68
|
+
|
69
|
+
def create_output_line_from values
|
70
|
+
remove_colums_with_data_from_empty_columns(values)
|
71
|
+
csv_line(values)
|
72
|
+
end
|
73
|
+
|
74
|
+
def remove_colums_with_data_from_empty_columns values
|
75
|
+
@empty_columns.select! { |column| values[column].nil? }
|
76
|
+
end
|
77
|
+
|
78
|
+
def change_for row
|
79
|
+
@changes.delete id_of(row)
|
80
|
+
end
|
81
|
+
|
82
|
+
def update_schema_to_reflect change
|
83
|
+
@columns += change.keys
|
84
|
+
@columns.uniq!
|
85
|
+
end
|
86
|
+
|
87
|
+
def id_of row
|
88
|
+
row.first
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
class StreamBatch
|
4
|
+
|
5
|
+
def initialize stream, batch_size
|
6
|
+
@stream = stream
|
7
|
+
@batch_size = batch_size
|
8
|
+
end
|
9
|
+
|
10
|
+
def each
|
11
|
+
yield next_batch until stream_end?
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def next_batch
|
17
|
+
batch = {}
|
18
|
+
|
19
|
+
batch.merge!(next_change) until batch_full?(batch)
|
20
|
+
|
21
|
+
batch
|
22
|
+
end
|
23
|
+
|
24
|
+
def next_change
|
25
|
+
JSON.parse(@stream.gets)
|
26
|
+
end
|
27
|
+
|
28
|
+
def batch_full? batch
|
29
|
+
batch.size >= @batch_size || stream_end?
|
30
|
+
end
|
31
|
+
|
32
|
+
def stream_end?
|
33
|
+
@stream.eof?
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
metadata
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: csv_patch
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Lyudmil
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-10-27 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: jeweler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ! '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ! '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: mocha
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ! '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: Applies a list of changes in a given format to a CSV file
|
42
|
+
email: lyudmilangelov@gmail.com
|
43
|
+
executables:
|
44
|
+
- csv_patch
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- Gemfile
|
49
|
+
- Gemfile.lock
|
50
|
+
- Rakefile
|
51
|
+
- VERSION
|
52
|
+
- bin/csv_patch
|
53
|
+
- csv_patch.gemspec
|
54
|
+
- lib/csv_patch.rb
|
55
|
+
- lib/csv_patch/compression.rb
|
56
|
+
- lib/csv_patch/operation.rb
|
57
|
+
- lib/csv_patch/patch.rb
|
58
|
+
- lib/csv_patch/revision.rb
|
59
|
+
- lib/csv_patch/stream_batch.rb
|
60
|
+
homepage:
|
61
|
+
licenses: []
|
62
|
+
metadata: {}
|
63
|
+
post_install_message:
|
64
|
+
rdoc_options: []
|
65
|
+
require_paths:
|
66
|
+
- lib
|
67
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ! '>='
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '0'
|
72
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ! '>='
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
requirements: []
|
78
|
+
rubyforge_project:
|
79
|
+
rubygems_version: 2.2.2
|
80
|
+
signing_key:
|
81
|
+
specification_version: 4
|
82
|
+
summary: Apply diffs to a CSV file
|
83
|
+
test_files: []
|