ascii-data-tools 0.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. data/.gitignore +3 -0
  2. data/.rvmrc +1 -0
  3. data/.travis.yml +4 -0
  4. data/Gemfile +3 -0
  5. data/Gemfile.lock +40 -0
  6. data/LICENSE.GPL2 +339 -0
  7. data/README.rdoc +52 -0
  8. data/Rakefile +42 -0
  9. data/TODO +4 -0
  10. data/ascii-data-tools.gemspec +30 -0
  11. data/bin/ascii-data-cat +13 -0
  12. data/bin/ascii-data-edit +13 -0
  13. data/bin/ascii-data-norm +13 -0
  14. data/bin/ascii-data-qdiff +13 -0
  15. data/bin/ascii-data-tools-config +9 -0
  16. data/examples/big +10000 -0
  17. data/examples/built_in_records.gz +0 -0
  18. data/examples/slightly_modified_built_in_records.gz +0 -0
  19. data/features/ascii-data-cat.feature +110 -0
  20. data/features/ascii-data-edit.feature +91 -0
  21. data/features/ascii-data-qdiff.feature +54 -0
  22. data/features/encoding_decoding.feature +68 -0
  23. data/features/normaliser.feature +27 -0
  24. data/features/plugins.feature +73 -0
  25. data/features/record_recognition.feature +61 -0
  26. data/features/step_definitions/ascii-data-cat_steps.rb +48 -0
  27. data/features/step_definitions/ascii-data-edit_steps.rb +38 -0
  28. data/features/step_definitions/ascii-data-norm_steps.rb +7 -0
  29. data/features/step_definitions/ascii-data-qdiff_steps.rb +43 -0
  30. data/features/step_definitions/encoding_decoding_steps.rb +23 -0
  31. data/features/step_definitions/plugins_steps.rb +11 -0
  32. data/features/step_definitions/record_recognition_steps.rb +10 -0
  33. data/features/support/env.rb +5 -0
  34. data/lib/ascii-data-tools.rb +8 -0
  35. data/lib/ascii-data-tools/configuration.rb +169 -0
  36. data/lib/ascii-data-tools/configuration_printer.rb +38 -0
  37. data/lib/ascii-data-tools/controller.rb +123 -0
  38. data/lib/ascii-data-tools/discover.rb +19 -0
  39. data/lib/ascii-data-tools/external_programs.rb +23 -0
  40. data/lib/ascii-data-tools/filter.rb +148 -0
  41. data/lib/ascii-data-tools/filter/diffing.rb +139 -0
  42. data/lib/ascii-data-tools/formatting.rb +109 -0
  43. data/lib/ascii-data-tools/global_autodiscovery.rb +21 -0
  44. data/lib/ascii-data-tools/record.rb +50 -0
  45. data/lib/ascii-data-tools/record_type.rb +139 -0
  46. data/lib/ascii-data-tools/record_type/builder.rb +50 -0
  47. data/lib/ascii-data-tools/record_type/decoder.rb +77 -0
  48. data/lib/ascii-data-tools/record_type/encoder.rb +17 -0
  49. data/lib/ascii-data-tools/record_type/field.rb +168 -0
  50. data/lib/ascii-data-tools/record_type/normaliser.rb +38 -0
  51. data/lib/ascii-data-tools/ruby_extensions.rb +7 -0
  52. data/lib/ascii-data-tools/version.rb +3 -0
  53. data/spec/ascii-data-tools/configuration_printer_spec.rb +51 -0
  54. data/spec/ascii-data-tools/configuration_spec.rb +153 -0
  55. data/spec/ascii-data-tools/discover_spec.rb +8 -0
  56. data/spec/ascii-data-tools/filter/diffing_spec.rb +82 -0
  57. data/spec/ascii-data-tools/filter_spec.rb +107 -0
  58. data/spec/ascii-data-tools/formatting_spec.rb +106 -0
  59. data/spec/ascii-data-tools/record_spec.rb +49 -0
  60. data/spec/ascii-data-tools/record_type/builder_spec.rb +69 -0
  61. data/spec/ascii-data-tools/record_type/decoder_spec.rb +73 -0
  62. data/spec/ascii-data-tools/record_type/encoder_spec.rb +32 -0
  63. data/spec/ascii-data-tools/record_type/field_spec.rb +160 -0
  64. data/spec/ascii-data-tools/record_type/normaliser_spec.rb +25 -0
  65. data/spec/ascii-data-tools/record_type_spec.rb +175 -0
  66. data/spec/filter_helper.rb +24 -0
  67. data/spec/record_type_helpers.rb +8 -0
  68. data/spec/spec.opts +2 -0
  69. data/spec/spec_helper.rb +5 -0
  70. metadata +196 -0
@@ -0,0 +1,38 @@
1
+ module AsciiDataTools
2
+ module RecordType
3
+ module Normaliser
4
+ module Normaliser
5
+ def normalise(encoded_record)
6
+ @regexps_to_normalise_fields ||= make_regexps_to_normalise_fields
7
+ fields_to_normalise.inject(encoded_record) do |normalised_string, field|
8
+ normalised_string.gsub(@regexps_to_normalise_fields[field], '\1' + 'X' * field.length + '\3' )
9
+ end
10
+ end
11
+
12
+ protected
13
+ def make_regexps_to_normalise_fields
14
+ fields_to_normalise.inject({}) {|map, field| map[field] = make_normalising_regexp_for(field); map }
15
+ end
16
+
17
+ def fields_to_normalise
18
+ @fields_to_normalise ||= fields.select {|f| f.normalised?}
19
+ end
20
+
21
+ def make_normalising_regexp_for(field)
22
+ index_of_normalised_field = fields.index(field)
23
+ preceeding_fields = fields[0...index_of_normalised_field]
24
+ proceeding_fields = fields[index_of_normalised_field+1..-1]
25
+
26
+ regexp_for_preceeding_fields = preceeding_fields.collect {|f| length_match_for(f) }.join
27
+ regexp_for_proceeding_fields = proceeding_fields.collect {|f| length_match_for(f) }.join
28
+
29
+ Regexp.new("^(%s)(%s)(%s)$" % [regexp_for_preceeding_fields, length_match_for(field), regexp_for_proceeding_fields], Regexp::MULTILINE)
30
+ end
31
+
32
+ def length_match_for(field)
33
+ ".{#{field.length}}"
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,7 @@
1
+ if RUBY_VERSION =~ /1[.]9/
2
+ module Enumerable
3
+ def enum_with_index
4
+ map.with_index
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,3 @@
1
+ module AsciiDataTools
2
+ VERSION = "0.9"
3
+ end
@@ -0,0 +1,51 @@
1
+ require 'spec_helper'
2
+ require 'ascii-data-tools/configuration_printer'
3
+
4
+ module AsciiDataTools
5
+ describe RecordTypesConfigurationPrinter do
6
+ before do
7
+ @presenter = mock(RecordTypesConfigurationPresenter,
8
+ :headings => ["type name", "total length", "constraints", "normalised fields"],
9
+ :record_type_summaries => [["x", "y", "z", "w"], ["a", "b", "c", "d"]]
10
+ )
11
+ end
12
+
13
+ it "should print out the headers from the presenter" do
14
+ RecordTypesConfigurationPrinter.new(@presenter).summary.should include("type name", "total length", "constraints", "normalised fields")
15
+ end
16
+
17
+ it "should print out the record type summaries" do
18
+ RecordTypesConfigurationPrinter.new(@presenter).summary.should include("x", "y", "z", "w", "a", "b", "c", "d")
19
+ end
20
+ end
21
+
22
+ describe RecordTypesConfigurationPresenter do
23
+ include RecordTypeHelpers
24
+ it "should provide headings" do
25
+ RecordTypesConfigurationPresenter.new(nil).headings.should == ["type name", "total length", "constraints", "normalised fields"]
26
+ end
27
+
28
+ it "should present every record type as a row" do
29
+ record_types = [type("ABC"), type("DEF")]
30
+ RecordTypesConfigurationPresenter.new(record_types).record_type_summaries[0].should == ["ABC", 0, "", ""]
31
+ RecordTypesConfigurationPresenter.new(record_types).record_type_summaries[1].should == ["DEF", 0, "", ""]
32
+ end
33
+
34
+ it "should sort the record types by the total length" do
35
+ longer_record_type = type("longer") { field 'XYZ', :length => 5 }
36
+ shorter_record_type = type("shorter") { field 'ABC', :length => 3 }
37
+ record_types = [longer_record_type, shorter_record_type]
38
+ RecordTypesConfigurationPresenter.new(record_types).record_type_summaries[0].first.should == "shorter"
39
+ RecordTypesConfigurationPresenter.new(record_types).record_type_summaries[1].first.should == "longer"
40
+ end
41
+
42
+ it "should present the normalised fields" do
43
+ record_types = [type("ABC") do
44
+ field 'yyy', :length => 1
45
+ field 'xxx', :length => 2, :normalised => true
46
+ end
47
+ ]
48
+ RecordTypesConfigurationPresenter.new(record_types).record_type_summaries[0].should == ["ABC", 3, "", "xxx"]
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,153 @@
1
+ require 'spec_helper'
2
+ require 'stringio'
3
+
4
+ module AsciiDataTools
5
+ describe Configuration do
6
+ it "should allow overwriting the input source, output stream, record types and user feedback stream" do
7
+ input_source = mock("input source")
8
+ output_stream = mock("output stream")
9
+ config = Configuration.new([], {:input_sources => [input_source],
10
+ :output_stream => output_stream,
11
+ :record_types => "record types",
12
+ :user_feedback_stream => "user feedback stream"})
13
+ config.output_stream.should == output_stream
14
+ config.input_sources.should == [input_source]
15
+ config.record_types.should == "record types"
16
+ config.user_feedback_stream.should == "user feedback stream"
17
+ end
18
+
19
+ it "should not be valid unless the input stream is specified" do
20
+ config = Configuration.new([], :record_types => "record types")
21
+ config.should_not be_valid
22
+ config.errors.should include("No input specified.")
23
+ end
24
+
25
+ it "should accept existing flat files as input" do
26
+ File.stub!(:exists?).with("path/to/file").and_return(true)
27
+ File.should_receive(:open).with("path/to/file").and_return(mock(IO))
28
+
29
+ config = Configuration.new(["path/to/file"], :record_types => "record types")
30
+ config.should be_valid
31
+ end
32
+
33
+ it "should reject non-existing flat files as input" do
34
+ File.stub!(:exists?).with("path/to/file").and_return(false)
35
+ config = Configuration.new(["path/to/file"], :record_types => "record types")
36
+ config.should_not be_valid
37
+ config.errors.should include("File path/to/file does not exist!")
38
+ end
39
+
40
+ it "should exit when passed invalid options" do
41
+ config = Configuration.new(["-xxx"], :record_types => "record types")
42
+ config.should_not be_valid
43
+ config.errors.should include("invalid option: -xxx")
44
+ end
45
+
46
+ it "should load record types using autodiscovery by default" do
47
+ AsciiDataTools.should_receive(:autodiscover).once
48
+ AsciiDataTools.stub!(:record_types).and_return("record types")
49
+ Configuration.new([]).record_types.should == "record types"
50
+ end
51
+
52
+ it "should use the override for record types if specified" do
53
+ AsciiDataTools.should_receive(:autodiscover).exactly(0).times
54
+ Configuration.new([], :record_types => "overriden record types").record_types.should == "overriden record types"
55
+ end
56
+ end
57
+
58
+ describe InputSourceFactory do
59
+ it "should use STDIN as the stream when - is the input argument" do
60
+ source_from(["-"]).stream.should == STDIN
61
+ end
62
+
63
+ it "should raise an error if the path specified in the input argument does not exist" do
64
+ lambda { source_from(["path/to/non-existent-file"]) }.should raise_error(/does not exist/)
65
+ end
66
+
67
+ it "should raise an error if the input parameters are empty" do
68
+ lambda { source_from([]) }.should raise_error(/No input specified/)
69
+ end
70
+
71
+ it "should raise an error if the wrong number of input parameters is specified" do
72
+ lambda { source_from(["x", "y"]) }.should raise_error(/2 input sources detected/i)
73
+ end
74
+
75
+ it "should process multiple input sources if so configured" do
76
+ File.stub!(:exists?).with("path/to/file1").and_return(true)
77
+ File.should_receive(:open).with("path/to/file1").and_return("IO stream 1")
78
+ File.stub!(:exists?).with("path/to/file2").and_return(true)
79
+ File.should_receive(:open).with("path/to/file2").and_return("IO stream 2")
80
+
81
+ factory = InputSourceFactory.new(:expected_argument_number => 2)
82
+ sources = factory.input_sources_from ["path/to/file1", "path/to/file2"]
83
+ sources[0].stream.should == "IO stream 1"
84
+ sources[1].stream.should == "IO stream 2"
85
+ end
86
+
87
+ it "should reject the input pipe as an argument if so configured" do
88
+ lambda { InputSourceFactory.new(:input_pipe_accepted => false).input_sources_from(["-"]) }.should raise_error /STDIN/
89
+ end
90
+
91
+ it "should open the file normally if the path specified in the input argument exists and the file is not gzipped" do
92
+ File.stub!(:exists?).with("path/to/file").and_return(true)
93
+ File.should_receive(:open).with("path/to/file").and_return("IO stream")
94
+
95
+ source_from(["path/to/file"]).stream.should == "IO stream"
96
+ end
97
+
98
+ it "should open the file as a gzip read stream if the path specified in the input argument exists and the file is gzipped" do
99
+ File.stub!(:exists?).with("path/to/file.gz").and_return(true)
100
+ Zlib::GzipReader.should_receive(:open).with("path/to/file.gz").and_return("IO stream")
101
+
102
+ source_from(["path/to/file.gz"]).stream.should == "IO stream"
103
+ end
104
+
105
+ def source_from(args)
106
+ InputSourceFactory.new(:expected_argument_number => 1, :input_pipe_accepted => true).input_sources_from(args).first
107
+ end
108
+ end
109
+
110
+ describe Editor do
111
+ it "should write input streams to files" do
112
+ result_aggregator = ""
113
+ editor = Editor.new do |filenames|
114
+ result_aggregator = filenames.inject(result_aggregator) {|agg, f| agg + File.read(f) }
115
+ end
116
+ editor[0] << "file1 "
117
+ editor[1] << "file2 "
118
+ editor[2] << "file3"
119
+
120
+ editor.edit
121
+
122
+ result_aggregator.should == "file1 file2 file3"
123
+ end
124
+
125
+ it "should detect when no changes were made during editing" do
126
+ editor = Editor.new do |filenames| end
127
+ editor[0] << "hello"
128
+ editor.edit
129
+ editor.changed?(0).should be_false
130
+ end
131
+
132
+ it "should detect when a change was made during editing" do
133
+ now = Time.new
134
+ File.should_receive(:mtime).and_return(now, now+1)
135
+
136
+ editor = Editor.new do |filenames| end
137
+ editor[0] << "hello"
138
+ editor.edit
139
+ editor.changed?(0).should be_true
140
+ end
141
+ end
142
+
143
+ describe InputSource do
144
+ it "should read a line from the input stream when prompted to read and should know when it's full or empty" do
145
+ source = InputSource.new("some file", StringIO.new("abc\ndef\n"))
146
+
147
+ source.should have_records
148
+ source.read.should == "abc\n"
149
+ source.read.should == "def\n"
150
+ source.should_not have_records
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,8 @@
1
+ require 'spec_helper'
2
+
3
+ describe "the default configuration" do
4
+ it "should add the EXAMPLE01 type to the configuration" do
5
+ require 'ascii-data-tools/discover'
6
+ AsciiDataTools.record_types.find_by_name("EXAMPLE01").should_not be_nil
7
+ end
8
+ end
@@ -0,0 +1,82 @@
1
+ require 'spec_helper'
2
+ require 'filter_helper'
3
+
4
+ require 'ascii-data-tools/configuration'
5
+ require 'ascii-data-tools/filter'
6
+ require 'stringio'
7
+
8
+ module AsciiDataTools
9
+ module Filter
10
+ module Diffing
11
+ describe DiffExecutingFilter do
12
+ it "should return the diff if the inputs are not the same" do
13
+ should output("2a3\n> xyz\n").from_upstream([input_source_containing("abc\ndef\n"), input_source_containing("abc\ndef\nxyz\n")])
14
+ end
15
+
16
+ it "should raise an exception when the streams are the same" do
17
+ filter = DiffExecutingFilter.new
18
+ filter << [input_source_containing("abc\ndef\n"), input_source_containing("abc\ndef\n")]
19
+ lambda { filter.write(StringIO.new) }.should raise_error(StreamsEqualException)
20
+ end
21
+ end
22
+
23
+ describe DiffParsingFilter do
24
+ it "should sieve the diffs into left and right lines" do
25
+ filter = DiffParsingFilter.new
26
+ filter << input_source_containing("4c4,5\n< abc\n---\n> def\n> ghi\n")
27
+ difference = filter.read
28
+ difference.left_contents.should == ["abc\n"]
29
+ difference.right_contents.should == ["def\n", "ghi\n"]
30
+ end
31
+
32
+ context "for conflicts" do
33
+ it "should detect a one-line difference" do
34
+ filter = DiffParsingFilter.new
35
+ filter << input_source_containing("4c4\n< abc\n---\n> def\n")
36
+ filter.read.should be_a(Difference)
37
+ filter.should_not have_records
38
+ end
39
+
40
+ it "should detect a multi-line difference" do
41
+ filter = DiffParsingFilter.new
42
+ filter << input_source_containing("1,2c1,3\n< abc\n< def\n---\n> ghi\n> jkl\n> mno\n")
43
+ filter.read.should be_a(Difference)
44
+ filter.should_not have_records
45
+ end
46
+ end
47
+
48
+ context "for additions" do
49
+ it "should detect a one-line difference" do
50
+ filter = DiffParsingFilter.new
51
+ filter << input_source_containing("1a2\n> def\n")
52
+ filter.read.should be_a(Difference)
53
+ filter.should_not have_records
54
+ end
55
+
56
+ it "should detect a multi-line difference" do
57
+ filter = DiffParsingFilter.new
58
+ filter << input_source_containing("1a2,3\n> def\n> xyz\n")
59
+ filter.read.should be_a(Difference)
60
+ filter.should_not have_records
61
+ end
62
+ end
63
+
64
+ context "for deletions" do
65
+ it "should detect a one-line difference" do
66
+ filter = DiffParsingFilter.new
67
+ filter << input_source_containing("1d2\n< def\n")
68
+ filter.read.should be_a(Difference)
69
+ filter.should_not have_records
70
+ end
71
+
72
+ it "should detect a multi-line difference" do
73
+ filter = DiffParsingFilter.new
74
+ filter << input_source_containing("1,3d2\n< def\n< xyz\n\< wuv\n")
75
+ filter.read.should be_a(Difference)
76
+ filter.should_not have_records
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,107 @@
1
+ require 'spec_helper'
2
+ require 'filter_helper'
3
+
4
+ require 'ascii-data-tools/configuration'
5
+ require 'ascii-data-tools/filter'
6
+ require 'ascii-data-tools/record_type'
7
+ require 'stringio'
8
+
9
+ module AsciiDataTools
10
+ module Filter
11
+ describe Filter do
12
+ it "should read from 'upstream' and filter when reading" do
13
+ filter = Filter.new do |record|
14
+ record.strip.reverse + "\n"
15
+ end
16
+ filter << mock("upstream object", :read => "abc\n")
17
+
18
+ filter.read.should == "cba\n"
19
+ end
20
+
21
+ it "should read from upstream and write to given output" do
22
+ Filter.new do |record|
23
+ record.strip.reverse + "\n"
24
+ end.should output("cba\nfed\n").from_upstream("abc\ndef\n")
25
+ end
26
+
27
+ it "should be chainable" do
28
+ f1 = Filter.new {|r| r.gsub(/\d/, "X") }
29
+ f2 = Filter.new {|r| r.count("X").to_s }
30
+ f3 = Filter.new {|r| r }
31
+
32
+ f3 << (f2 << (f1 << input_source_containing("ab1cd2")))
33
+ f3.read.should == "2"
34
+ end
35
+ end
36
+
37
+ describe BufferingFilter do
38
+ it "should buffer the upstream into a tempfile before the first read and then return it" do
39
+ BufferingFilter.new do |buffered_upstream_as_tempfile|
40
+ buffered_upstream_as_tempfile
41
+ end.should output("abc\ndef\n").from_upstream("abc\ndef\n")
42
+ end
43
+
44
+ it "should be chainable" do
45
+ first_filter = BufferingFilter.new do |tempfile|
46
+ StringIO.new(tempfile.readlines.map {|s| s.upcase}.join(""))
47
+ end
48
+ BufferingFilter.new do |tempfile|
49
+ StringIO.new(tempfile.readlines.map {|s| s.strip + "n" + "\n" }.join(""))
50
+ end.should output("ABCn\nDEFn\n").from_upstream(first_filter, "abc\ndef\n")
51
+ end
52
+ end
53
+
54
+ describe SortingFilter do
55
+ it "should sort the given stream" do
56
+ should output("abc\ndef\nxyz\n").from_upstream("xyz\nabc\ndef\n")
57
+ end
58
+ end
59
+
60
+ DECODED_FIXED_LENGTH_RECORD = <<STR
61
+ Record 01 (ABC)
62
+ 01 field1 : [12345]-----
63
+ 02 field10 : [abc]-------
64
+ 03 field3 : [\\n]--------
65
+
66
+ STR
67
+
68
+ SEVERAL_FIXED_LENGTH_RECORDS = <<STR
69
+ Record 01 (unknown)
70
+ 01 UNKNOWN : [12345]-----
71
+
72
+ Record 02 (unknown)
73
+ 01 UNKNOWN : [abc]-----
74
+
75
+ STR
76
+
77
+ describe ParsingFilter do
78
+ include RecordTypeHelpers
79
+ include AsciiDataTools::Record
80
+ include AsciiDataTools::RecordType
81
+ it "should identify a decoded record and encode it" do
82
+ type = type("ABC") do
83
+ field 'field1', :length => 5
84
+ field 'filed10', :length => 3
85
+ field 'field3', :length => 1
86
+ end
87
+ record_types = mock(AsciiDataTools::RecordType::RecordTypeRepository)
88
+ record_types.should_receive(:find_by_name).with("ABC").and_return(type)
89
+
90
+ filter = ParsingFilter.new(record_types)
91
+ filter << input_source_containing(DECODED_FIXED_LENGTH_RECORD)
92
+ filter.read.should == AsciiDataTools::Record::Record.new(type, ["12345", "abc", "\n"])
93
+ end
94
+
95
+ it "should identify a decoded record and encode it" do
96
+ type = AsciiDataTools::RecordType::UnknownType.new
97
+ record_types = mock(AsciiDataTools::RecordType::RecordTypeRepository)
98
+ record_types.should_receive(:find_by_name).with("unknown").twice.and_return(type)
99
+
100
+ filter = ParsingFilter.new(record_types)
101
+ filter << input_source_containing(SEVERAL_FIXED_LENGTH_RECORDS)
102
+ filter.read.should == AsciiDataTools::Record::Record.new(type, ["12345"])
103
+ filter.read.should == AsciiDataTools::Record::Record.new(type, ["abc"])
104
+ end
105
+ end
106
+ end
107
+ end