csv_patch 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YmE4OGVkYThmNGU0NzZiMzdkYWQxYjUzYjc2YzY0OTQzYWU0OTlhNg==
5
+ data.tar.gz: !binary |-
6
+ ZWYyZTdkMzc2OTVmYjU2NGFmZmNiMzlkNjA4NGI1ODI1MWUxMWNiMQ==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ OGM1YWFjZWFmNWNhYzA1ODVjMjMzNTQ0MGYxNzhlOWJlNWUzMTI3ZTg3OTc2
10
+ NDdlMGIyNzc2Y2I1ZmQwOGQ0MTJlZDQwZjEzN2ZmNDZjYTNlODU1M2NkZGFj
11
+ MTY4ODk5MmM0N2ExMzNhMjU4ODg0NzdlMzVhNjNmYzVlNTliNzU=
12
+ data.tar.gz: !binary |-
13
+ YjE5ZTc5NjYxNjIxZTAzZTk5NzFkOWM4YmZlZDEyNTU5MDBmYWEyYWU0YmZh
14
+ OTZhNDllNWFhYWNjODkwMzMyNDk2YWQ5Mjg4ZThlZmRkZjE2ZDBiY2IyYWFi
15
+ ZDA2MWRlMTExZmVmNTA4OGIyODMxOTUwNmM3YmEwNDBkNWIzOGQ=
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ group :development, :test do
4
+ gem 'jeweler'
5
+ gem 'mocha'
6
+ end
@@ -0,0 +1,57 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ addressable (2.3.6)
5
+ builder (3.2.2)
6
+ descendants_tracker (0.0.3)
7
+ faraday (0.9.0)
8
+ multipart-post (>= 1.2, < 3)
9
+ git (1.2.6)
10
+ github_api (0.11.1)
11
+ addressable (~> 2.3)
12
+ descendants_tracker (~> 0.0.1)
13
+ faraday (~> 0.8, < 0.10)
14
+ hashie (>= 1.2)
15
+ multi_json (>= 1.7.5, < 2.0)
16
+ nokogiri (~> 1.6.0)
17
+ oauth2
18
+ hashie (2.0.5)
19
+ highline (1.6.20)
20
+ jeweler (2.0.0)
21
+ builder
22
+ bundler (>= 1.0)
23
+ git (>= 1.2.5)
24
+ github_api
25
+ highline (>= 1.6.15)
26
+ nokogiri (>= 1.5.10)
27
+ rake
28
+ rdoc
29
+ json (1.8.1)
30
+ jwt (0.1.11)
31
+ multi_json (>= 1.5)
32
+ metaclass (0.0.4)
33
+ mini_portile (0.5.2)
34
+ mocha (1.1.0)
35
+ metaclass (~> 0.0.1)
36
+ multi_json (1.10.1)
37
+ multi_xml (0.5.5)
38
+ multipart-post (2.0.0)
39
+ nokogiri (1.6.1)
40
+ mini_portile (~> 0.5.0)
41
+ oauth2 (0.9.3)
42
+ faraday (>= 0.8, < 0.10)
43
+ jwt (~> 0.1.8)
44
+ multi_json (~> 1.3)
45
+ multi_xml (~> 0.5)
46
+ rack (~> 1.2)
47
+ rack (1.5.2)
48
+ rake (10.3.2)
49
+ rdoc (4.1.1)
50
+ json (~> 1.4)
51
+
52
+ PLATFORMS
53
+ ruby
54
+
55
+ DEPENDENCIES
56
+ jeweler
57
+ mocha
@@ -0,0 +1,31 @@
1
+ # encoding: utf-8
2
+
3
+ require 'jeweler'
4
+ require 'rake/testtask'
5
+ require 'rdoc/task'
6
+
7
+ Jeweler::Tasks.new do |gem|
8
+ gem.name = 'csv_patch'
9
+ gem.summary = %Q{Apply diffs to a CSV file}
10
+ gem.description = %Q{Applies a list of changes in a given format to a CSV file}
11
+ gem.email = 'lyudmilangelov@gmail.com'
12
+ gem.authors = ['Lyudmil']
13
+ gem.files.exclude 'test/**/*', '.*'
14
+
15
+ gem.executables = ['csv_patch']
16
+ end
17
+
18
+ Rake::TestTask.new(:test) do |test|
19
+ test.libs << 'lib' << 'test'
20
+ test.pattern = 'test/**/*_test.rb'
21
+ test.verbose = true
22
+ end
23
+
24
+ Rake::RDocTask.new do |rdoc|
25
+ version = File.exist?('VERSION') ? File.read('VERSION') : ''
26
+
27
+ rdoc.rdoc_dir = 'rdoc'
28
+ rdoc.title = 'csv_patch #{version}'
29
+ rdoc.rdoc_files.include('README*')
30
+ rdoc.rdoc_files.include('lib/**/*.rb')
31
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.1
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'csv_patch'
5
+
6
+ options = {}
7
+
8
+ OptionParser.new do |opts|
9
+ opts.banner = 'Usage: csv_patch [options]'
10
+
11
+ opts.on('-iINPUT_FILE', '--input=INPUT_FILE', 'CSV file to patch (required)') do |input_file|
12
+ options[:input] = File.new(input_file, 'r')
13
+ end
14
+
15
+ opts.on('-oOUTPUT_FILE', '--output=OUTPUT_FILE', 'File to store the patched version in (required)') do |output_file|
16
+ options[:output] = File.new(output_file, 'w')
17
+ end
18
+
19
+ opts.on('-cCHANGES_FILE', '--changes=CHANGES_FILE', 'File containing the changes to apply (required)') do |changes_file|
20
+ options[:changes] = File.new(changes_file, 'r')
21
+ end
22
+
23
+ opts.on('-bBATCH_SIZE', '--batch_size=BATCH_SIZE', 'Number of changes to read before applying a patch') do |batch_size|
24
+ options[:batch_size] = batch_size.to_i
25
+ end
26
+
27
+ end.parse!
28
+
29
+ print 'Patching... '
30
+
31
+ CsvPatch.patch(options)
32
+
33
+ puts 'done'
34
+
35
+ [:input, :output, :changes].each { |file| options[file].close if options[file] }
@@ -0,0 +1,50 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+ # stub: csv_patch 1.0.1 ruby lib
6
+
7
+ Gem::Specification.new do |s|
8
+ s.name = "csv_patch"
9
+ s.version = "1.0.1"
10
+
11
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
+ s.require_paths = ["lib"]
13
+ s.authors = ["Lyudmil"]
14
+ s.date = "2015-10-27"
15
+ s.description = "Applies a list of changes in a given format to a CSV file"
16
+ s.email = "lyudmilangelov@gmail.com"
17
+ s.executables = ["csv_patch"]
18
+ s.files = [
19
+ "Gemfile",
20
+ "Gemfile.lock",
21
+ "Rakefile",
22
+ "VERSION",
23
+ "bin/csv_patch",
24
+ "csv_patch.gemspec",
25
+ "lib/csv_patch.rb",
26
+ "lib/csv_patch/compression.rb",
27
+ "lib/csv_patch/operation.rb",
28
+ "lib/csv_patch/patch.rb",
29
+ "lib/csv_patch/revision.rb",
30
+ "lib/csv_patch/stream_batch.rb"
31
+ ]
32
+ s.rubygems_version = "2.2.2"
33
+ s.summary = "Apply diffs to a CSV file"
34
+
35
+ if s.respond_to? :specification_version then
36
+ s.specification_version = 4
37
+
38
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
39
+ s.add_development_dependency(%q<jeweler>, [">= 0"])
40
+ s.add_development_dependency(%q<mocha>, [">= 0"])
41
+ else
42
+ s.add_dependency(%q<jeweler>, [">= 0"])
43
+ s.add_dependency(%q<mocha>, [">= 0"])
44
+ end
45
+ else
46
+ s.add_dependency(%q<jeweler>, [">= 0"])
47
+ s.add_dependency(%q<mocha>, [">= 0"])
48
+ end
49
+ end
50
+
@@ -0,0 +1,22 @@
1
+ require 'csv_patch/stream_batch'
2
+ require 'csv_patch/patch'
3
+
4
+ module CsvPatch
5
+
6
+ DEFAULT_BATCH_SIZE = 500
7
+
8
+ def self.patch options
9
+ batches(options).each { |changes| apply_patch(options, changes) }
10
+ end
11
+
12
+ private
13
+
14
+ def self.batches options
15
+ StreamBatch.new(options[:changes], options[:batch_size] || DEFAULT_BATCH_SIZE)
16
+ end
17
+
18
+ def self.apply_patch options, changes
19
+ Patch.new(input: options[:input], output: options[:output], changes: changes).apply
20
+ end
21
+
22
+ end
@@ -0,0 +1,44 @@
1
+ require 'csv_patch/operation'
2
+
3
+ module CsvPatch
4
+
5
+ class Compression < Operation
6
+
7
+ def initialize input_stream, output_stream, column_metadata
8
+ @empty_columns = column_metadata[:empty_columns]
9
+ @headers = column_metadata[:columns]
10
+
11
+ @input_stream = input_stream
12
+ @output_stream = output_stream
13
+ end
14
+
15
+ def execute
16
+ emit_header_row
17
+ emit_compressed_data
18
+ end
19
+
20
+ private
21
+
22
+ def emit_header_row
23
+ emit remove_empty_columns_from(@headers)
24
+ end
25
+
26
+ def emit_compressed_data
27
+ emit compress(@input_stream.gets) until @input_stream.eof?
28
+ end
29
+
30
+ def emit row
31
+ @output_stream.puts csv_line(row)
32
+ end
33
+
34
+ def compress line
35
+ remove_empty_columns_from csv_values(line)
36
+ end
37
+
38
+ def remove_empty_columns_from row
39
+ row.reject.with_index { |value, index| @empty_columns.include?(index) }
40
+ end
41
+
42
+ end
43
+
44
+ end
@@ -0,0 +1,15 @@
1
+ require 'csv'
2
+
3
+ module CsvPatch
4
+ class Operation
5
+
6
+ def csv_values line_of_csv
7
+ CSV.parse_line(line_of_csv)
8
+ end
9
+
10
+ def csv_line values
11
+ CSV.generate_line(values)
12
+ end
13
+
14
+ end
15
+ end
@@ -0,0 +1,44 @@
1
+ require 'tempfile'
2
+ require 'csv_patch/revision'
3
+ require 'csv_patch/compression'
4
+
5
+ module CsvPatch
6
+
7
+ class Patch
8
+
9
+ TEMPFILE_NAME = 'csv_patch'
10
+
11
+ def initialize options
12
+ @input, @output = options[:input], options[:output]
13
+
14
+ @revision_result = Tempfile.new(TEMPFILE_NAME)
15
+ @revision = Revision.new(options[:changes], @revision_result)
16
+ end
17
+
18
+ def apply
19
+ apply_changes
20
+ compress
21
+ end
22
+
23
+ private
24
+
25
+ def apply_changes
26
+ @revision.header_line(@input.gets)
27
+ @revision.replace_line(@input.gets) until @input.eof?
28
+ @revision.add_new_lines
29
+ end
30
+
31
+ def compress
32
+ @revision_result.rewind
33
+
34
+ compression.execute
35
+
36
+ @revision_result.close
37
+ end
38
+
39
+ def compression
40
+ Compression.new(@revision_result, @output, @revision.column_metadata)
41
+ end
42
+
43
+ end
44
+ end
@@ -0,0 +1,92 @@
1
+ require 'csv_patch/operation'
2
+
3
+ module CsvPatch
4
+
5
+ class Revision < Operation
6
+
7
+ EMPTY_LINE = "\n"
8
+
9
+ def initialize changes, output_stream
10
+ @output_stream = output_stream
11
+ @changes = changes
12
+
13
+ header_line(EMPTY_LINE)
14
+ end
15
+
16
+ def header_line line
17
+ return unless line
18
+
19
+ @columns = csv_values(line)
20
+ mark_all_columns_empty
21
+ end
22
+
23
+ def replace_line line
24
+ emit replacement_line_for csv_values(line)
25
+ end
26
+
27
+ def add_new_lines
28
+ @changes.values.each do |addition|
29
+ emit generate_new_row(addition)
30
+ end
31
+ end
32
+
33
+ def column_metadata
34
+ { columns: @columns, empty_columns: @empty_columns }
35
+ end
36
+
37
+ private
38
+
39
+ def mark_all_columns_empty
40
+ @empty_columns = (0..@columns.size - 1).to_a
41
+ end
42
+
43
+ def emit line
44
+ return if line.nil?
45
+ @output_stream.puts line
46
+ end
47
+
48
+ def changed? row
49
+ @changes.has_key? id_of(row)
50
+ end
51
+
52
+ def replacement_line_for row
53
+ return create_output_line_from(row) unless changed?(row)
54
+ generate_new_row change_for(row)
55
+ end
56
+
57
+ def generate_new_row row_data
58
+ return if row_data.nil?
59
+
60
+ update_schema_to_reflect row_data
61
+ line_for row_data
62
+ end
63
+
64
+ def line_for change
65
+ values = @columns.collect { |column| change[column] }
66
+ create_output_line_from(values)
67
+ end
68
+
69
+ def create_output_line_from values
70
+ remove_colums_with_data_from_empty_columns(values)
71
+ csv_line(values)
72
+ end
73
+
74
+ def remove_colums_with_data_from_empty_columns values
75
+ @empty_columns.select! { |column| values[column].nil? }
76
+ end
77
+
78
+ def change_for row
79
+ @changes.delete id_of(row)
80
+ end
81
+
82
+ def update_schema_to_reflect change
83
+ @columns += change.keys
84
+ @columns.uniq!
85
+ end
86
+
87
+ def id_of row
88
+ row.first
89
+ end
90
+
91
+ end
92
+ end
@@ -0,0 +1,36 @@
1
+ require 'json'
2
+
3
+ class StreamBatch
4
+
5
+ def initialize stream, batch_size
6
+ @stream = stream
7
+ @batch_size = batch_size
8
+ end
9
+
10
+ def each
11
+ yield next_batch until stream_end?
12
+ end
13
+
14
+ private
15
+
16
+ def next_batch
17
+ batch = {}
18
+
19
+ batch.merge!(next_change) until batch_full?(batch)
20
+
21
+ batch
22
+ end
23
+
24
+ def next_change
25
+ JSON.parse(@stream.gets)
26
+ end
27
+
28
+ def batch_full? batch
29
+ batch.size >= @batch_size || stream_end?
30
+ end
31
+
32
+ def stream_end?
33
+ @stream.eof?
34
+ end
35
+
36
+ end
metadata ADDED
@@ -0,0 +1,83 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: csv_patch
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Lyudmil
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-10-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: jeweler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: mocha
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: Applies a list of changes in a given format to a CSV file
42
+ email: lyudmilangelov@gmail.com
43
+ executables:
44
+ - csv_patch
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - Gemfile
49
+ - Gemfile.lock
50
+ - Rakefile
51
+ - VERSION
52
+ - bin/csv_patch
53
+ - csv_patch.gemspec
54
+ - lib/csv_patch.rb
55
+ - lib/csv_patch/compression.rb
56
+ - lib/csv_patch/operation.rb
57
+ - lib/csv_patch/patch.rb
58
+ - lib/csv_patch/revision.rb
59
+ - lib/csv_patch/stream_batch.rb
60
+ homepage:
61
+ licenses: []
62
+ metadata: {}
63
+ post_install_message:
64
+ rdoc_options: []
65
+ require_paths:
66
+ - lib
67
+ required_ruby_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ! '>='
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ required_rubygems_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ requirements: []
78
+ rubyforge_project:
79
+ rubygems_version: 2.2.2
80
+ signing_key:
81
+ specification_version: 4
82
+ summary: Apply diffs to a CSV file
83
+ test_files: []