ascii-data-tools 0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. data/.gitignore +3 -0
  2. data/.rvmrc +1 -0
  3. data/.travis.yml +4 -0
  4. data/Gemfile +3 -0
  5. data/Gemfile.lock +40 -0
  6. data/LICENSE.GPL2 +339 -0
  7. data/README.rdoc +52 -0
  8. data/Rakefile +42 -0
  9. data/TODO +4 -0
  10. data/ascii-data-tools.gemspec +30 -0
  11. data/bin/ascii-data-cat +13 -0
  12. data/bin/ascii-data-edit +13 -0
  13. data/bin/ascii-data-norm +13 -0
  14. data/bin/ascii-data-qdiff +13 -0
  15. data/bin/ascii-data-tools-config +9 -0
  16. data/examples/big +10000 -0
  17. data/examples/built_in_records.gz +0 -0
  18. data/examples/slightly_modified_built_in_records.gz +0 -0
  19. data/features/ascii-data-cat.feature +110 -0
  20. data/features/ascii-data-edit.feature +91 -0
  21. data/features/ascii-data-qdiff.feature +54 -0
  22. data/features/encoding_decoding.feature +68 -0
  23. data/features/normaliser.feature +27 -0
  24. data/features/plugins.feature +73 -0
  25. data/features/record_recognition.feature +61 -0
  26. data/features/step_definitions/ascii-data-cat_steps.rb +48 -0
  27. data/features/step_definitions/ascii-data-edit_steps.rb +38 -0
  28. data/features/step_definitions/ascii-data-norm_steps.rb +7 -0
  29. data/features/step_definitions/ascii-data-qdiff_steps.rb +43 -0
  30. data/features/step_definitions/encoding_decoding_steps.rb +23 -0
  31. data/features/step_definitions/plugins_steps.rb +11 -0
  32. data/features/step_definitions/record_recognition_steps.rb +10 -0
  33. data/features/support/env.rb +5 -0
  34. data/lib/ascii-data-tools.rb +8 -0
  35. data/lib/ascii-data-tools/configuration.rb +169 -0
  36. data/lib/ascii-data-tools/configuration_printer.rb +38 -0
  37. data/lib/ascii-data-tools/controller.rb +123 -0
  38. data/lib/ascii-data-tools/discover.rb +19 -0
  39. data/lib/ascii-data-tools/external_programs.rb +23 -0
  40. data/lib/ascii-data-tools/filter.rb +148 -0
  41. data/lib/ascii-data-tools/filter/diffing.rb +139 -0
  42. data/lib/ascii-data-tools/formatting.rb +109 -0
  43. data/lib/ascii-data-tools/global_autodiscovery.rb +21 -0
  44. data/lib/ascii-data-tools/record.rb +50 -0
  45. data/lib/ascii-data-tools/record_type.rb +139 -0
  46. data/lib/ascii-data-tools/record_type/builder.rb +50 -0
  47. data/lib/ascii-data-tools/record_type/decoder.rb +77 -0
  48. data/lib/ascii-data-tools/record_type/encoder.rb +17 -0
  49. data/lib/ascii-data-tools/record_type/field.rb +168 -0
  50. data/lib/ascii-data-tools/record_type/normaliser.rb +38 -0
  51. data/lib/ascii-data-tools/ruby_extensions.rb +7 -0
  52. data/lib/ascii-data-tools/version.rb +3 -0
  53. data/spec/ascii-data-tools/configuration_printer_spec.rb +51 -0
  54. data/spec/ascii-data-tools/configuration_spec.rb +153 -0
  55. data/spec/ascii-data-tools/discover_spec.rb +8 -0
  56. data/spec/ascii-data-tools/filter/diffing_spec.rb +82 -0
  57. data/spec/ascii-data-tools/filter_spec.rb +107 -0
  58. data/spec/ascii-data-tools/formatting_spec.rb +106 -0
  59. data/spec/ascii-data-tools/record_spec.rb +49 -0
  60. data/spec/ascii-data-tools/record_type/builder_spec.rb +69 -0
  61. data/spec/ascii-data-tools/record_type/decoder_spec.rb +73 -0
  62. data/spec/ascii-data-tools/record_type/encoder_spec.rb +32 -0
  63. data/spec/ascii-data-tools/record_type/field_spec.rb +160 -0
  64. data/spec/ascii-data-tools/record_type/normaliser_spec.rb +25 -0
  65. data/spec/ascii-data-tools/record_type_spec.rb +175 -0
  66. data/spec/filter_helper.rb +24 -0
  67. data/spec/record_type_helpers.rb +8 -0
  68. data/spec/spec.opts +2 -0
  69. data/spec/spec_helper.rb +5 -0
  70. metadata +196 -0
@@ -0,0 +1,38 @@
1
+ module AsciiDataTools
2
+ module RecordType
3
+ module Normaliser
4
+ module Normaliser
5
+ def normalise(encoded_record)
6
+ @regexps_to_normalise_fields ||= make_regexps_to_normalise_fields
7
+ fields_to_normalise.inject(encoded_record) do |normalised_string, field|
8
+ normalised_string.gsub(@regexps_to_normalise_fields[field], '\1' + 'X' * field.length + '\3' )
9
+ end
10
+ end
11
+
12
+ protected
13
+ def make_regexps_to_normalise_fields
14
+ fields_to_normalise.inject({}) {|map, field| map[field] = make_normalising_regexp_for(field); map }
15
+ end
16
+
17
+ def fields_to_normalise
18
+ @fields_to_normalise ||= fields.select {|f| f.normalised?}
19
+ end
20
+
21
+ def make_normalising_regexp_for(field)
22
+ index_of_normalised_field = fields.index(field)
23
+ preceeding_fields = fields[0...index_of_normalised_field]
24
+ proceeding_fields = fields[index_of_normalised_field+1..-1]
25
+
26
+ regexp_for_preceeding_fields = preceeding_fields.collect {|f| length_match_for(f) }.join
27
+ regexp_for_proceeding_fields = proceeding_fields.collect {|f| length_match_for(f) }.join
28
+
29
+ Regexp.new("^(%s)(%s)(%s)$" % [regexp_for_preceeding_fields, length_match_for(field), regexp_for_proceeding_fields], Regexp::MULTILINE)
30
+ end
31
+
32
+ def length_match_for(field)
33
+ ".{#{field.length}}"
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,7 @@
1
+ if RUBY_VERSION =~ /1[.]9/
2
+ module Enumerable
3
+ def enum_with_index
4
+ map.with_index
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,3 @@
1
+ module AsciiDataTools
2
+ VERSION = "0.9"
3
+ end
@@ -0,0 +1,51 @@
1
+ require 'spec_helper'
2
+ require 'ascii-data-tools/configuration_printer'
3
+
4
+ module AsciiDataTools
5
+ describe RecordTypesConfigurationPrinter do
6
+ before do
7
+ @presenter = mock(RecordTypesConfigurationPresenter,
8
+ :headings => ["type name", "total length", "constraints", "normalised fields"],
9
+ :record_type_summaries => [["x", "y", "z", "w"], ["a", "b", "c", "d"]]
10
+ )
11
+ end
12
+
13
+ it "should print out the headers from the presenter" do
14
+ RecordTypesConfigurationPrinter.new(@presenter).summary.should include("type name", "total length", "constraints", "normalised fields")
15
+ end
16
+
17
+ it "should print out the record type summaries" do
18
+ RecordTypesConfigurationPrinter.new(@presenter).summary.should include("x", "y", "z", "w", "a", "b", "c", "d")
19
+ end
20
+ end
21
+
22
+ describe RecordTypesConfigurationPresenter do
23
+ include RecordTypeHelpers
24
+ it "should provide headings" do
25
+ RecordTypesConfigurationPresenter.new(nil).headings.should == ["type name", "total length", "constraints", "normalised fields"]
26
+ end
27
+
28
+ it "should present every record type as a row" do
29
+ record_types = [type("ABC"), type("DEF")]
30
+ RecordTypesConfigurationPresenter.new(record_types).record_type_summaries[0].should == ["ABC", 0, "", ""]
31
+ RecordTypesConfigurationPresenter.new(record_types).record_type_summaries[1].should == ["DEF", 0, "", ""]
32
+ end
33
+
34
+ it "should sort the record types by the total length" do
35
+ longer_record_type = type("longer") { field 'XYZ', :length => 5 }
36
+ shorter_record_type = type("shorter") { field 'ABC', :length => 3 }
37
+ record_types = [longer_record_type, shorter_record_type]
38
+ RecordTypesConfigurationPresenter.new(record_types).record_type_summaries[0].first.should == "shorter"
39
+ RecordTypesConfigurationPresenter.new(record_types).record_type_summaries[1].first.should == "longer"
40
+ end
41
+
42
+ it "should present the normalised fields" do
43
+ record_types = [type("ABC") do
44
+ field 'yyy', :length => 1
45
+ field 'xxx', :length => 2, :normalised => true
46
+ end
47
+ ]
48
+ RecordTypesConfigurationPresenter.new(record_types).record_type_summaries[0].should == ["ABC", 3, "", "xxx"]
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,153 @@
1
+ require 'spec_helper'
2
+ require 'stringio'
3
+
4
+ module AsciiDataTools
5
+ describe Configuration do
6
+ it "should allow overwriting the input source, output stream, record types and user feedback stream" do
7
+ input_source = mock("input source")
8
+ output_stream = mock("output stream")
9
+ config = Configuration.new([], {:input_sources => [input_source],
10
+ :output_stream => output_stream,
11
+ :record_types => "record types",
12
+ :user_feedback_stream => "user feedback stream"})
13
+ config.output_stream.should == output_stream
14
+ config.input_sources.should == [input_source]
15
+ config.record_types.should == "record types"
16
+ config.user_feedback_stream.should == "user feedback stream"
17
+ end
18
+
19
+ it "should not be valid unless the input stream is specified" do
20
+ config = Configuration.new([], :record_types => "record types")
21
+ config.should_not be_valid
22
+ config.errors.should include("No input specified.")
23
+ end
24
+
25
+ it "should accept existing flat files as input" do
26
+ File.stub!(:exists?).with("path/to/file").and_return(true)
27
+ File.should_receive(:open).with("path/to/file").and_return(mock(IO))
28
+
29
+ config = Configuration.new(["path/to/file"], :record_types => "record types")
30
+ config.should be_valid
31
+ end
32
+
33
+ it "should reject non-existing flat files as input" do
34
+ File.stub!(:exists?).with("path/to/file").and_return(false)
35
+ config = Configuration.new(["path/to/file"], :record_types => "record types")
36
+ config.should_not be_valid
37
+ config.errors.should include("File path/to/file does not exist!")
38
+ end
39
+
40
+ it "should exit when passed invalid options" do
41
+ config = Configuration.new(["-xxx"], :record_types => "record types")
42
+ config.should_not be_valid
43
+ config.errors.should include("invalid option: -xxx")
44
+ end
45
+
46
+ it "should load record types using autodiscovery by default" do
47
+ AsciiDataTools.should_receive(:autodiscover).once
48
+ AsciiDataTools.stub!(:record_types).and_return("record types")
49
+ Configuration.new([]).record_types.should == "record types"
50
+ end
51
+
52
+ it "should use the override for record types if specified" do
53
+ AsciiDataTools.should_receive(:autodiscover).exactly(0).times
54
+ Configuration.new([], :record_types => "overriden record types").record_types.should == "overriden record types"
55
+ end
56
+ end
57
+
58
+ describe InputSourceFactory do
59
+ it "should use STDIN as the stream when - is the input argument" do
60
+ source_from(["-"]).stream.should == STDIN
61
+ end
62
+
63
+ it "should raise an error if the path specified in the input argument does not exist" do
64
+ lambda { source_from(["path/to/non-existent-file"]) }.should raise_error(/does not exist/)
65
+ end
66
+
67
+ it "should raise an error if the input parameters are empty" do
68
+ lambda { source_from([]) }.should raise_error(/No input specified/)
69
+ end
70
+
71
+ it "should raise an error if the wrong number of input parameters is specified" do
72
+ lambda { source_from(["x", "y"]) }.should raise_error(/2 input sources detected/i)
73
+ end
74
+
75
+ it "should process multiple input sources if so configured" do
76
+ File.stub!(:exists?).with("path/to/file1").and_return(true)
77
+ File.should_receive(:open).with("path/to/file1").and_return("IO stream 1")
78
+ File.stub!(:exists?).with("path/to/file2").and_return(true)
79
+ File.should_receive(:open).with("path/to/file2").and_return("IO stream 2")
80
+
81
+ factory = InputSourceFactory.new(:expected_argument_number => 2)
82
+ sources = factory.input_sources_from ["path/to/file1", "path/to/file2"]
83
+ sources[0].stream.should == "IO stream 1"
84
+ sources[1].stream.should == "IO stream 2"
85
+ end
86
+
87
+ it "should reject the input pipe as an argument if so configured" do
88
+ lambda { InputSourceFactory.new(:input_pipe_accepted => false).input_sources_from(["-"]) }.should raise_error /STDIN/
89
+ end
90
+
91
+ it "should open the file normally if the path specified in the input argument exists and the file is not gzipped" do
92
+ File.stub!(:exists?).with("path/to/file").and_return(true)
93
+ File.should_receive(:open).with("path/to/file").and_return("IO stream")
94
+
95
+ source_from(["path/to/file"]).stream.should == "IO stream"
96
+ end
97
+
98
+ it "should open the file as a gzip read stream if the path specified in the input argument exists and the file is gzipped" do
99
+ File.stub!(:exists?).with("path/to/file.gz").and_return(true)
100
+ Zlib::GzipReader.should_receive(:open).with("path/to/file.gz").and_return("IO stream")
101
+
102
+ source_from(["path/to/file.gz"]).stream.should == "IO stream"
103
+ end
104
+
105
+ def source_from(args)
106
+ InputSourceFactory.new(:expected_argument_number => 1, :input_pipe_accepted => true).input_sources_from(args).first
107
+ end
108
+ end
109
+
110
+ describe Editor do
111
+ it "should write input streams to files" do
112
+ result_aggregator = ""
113
+ editor = Editor.new do |filenames|
114
+ result_aggregator = filenames.inject(result_aggregator) {|agg, f| agg + File.read(f) }
115
+ end
116
+ editor[0] << "file1 "
117
+ editor[1] << "file2 "
118
+ editor[2] << "file3"
119
+
120
+ editor.edit
121
+
122
+ result_aggregator.should == "file1 file2 file3"
123
+ end
124
+
125
+ it "should detect when no changes were made during editing" do
126
+ editor = Editor.new do |filenames| end
127
+ editor[0] << "hello"
128
+ editor.edit
129
+ editor.changed?(0).should be_false
130
+ end
131
+
132
+ it "should detect when a change was made during editing" do
133
+ now = Time.new
134
+ File.should_receive(:mtime).and_return(now, now+1)
135
+
136
+ editor = Editor.new do |filenames| end
137
+ editor[0] << "hello"
138
+ editor.edit
139
+ editor.changed?(0).should be_true
140
+ end
141
+ end
142
+
143
+ describe InputSource do
144
+ it "should read a line from the input stream when prompted to read and should know when it's full or empty" do
145
+ source = InputSource.new("some file", StringIO.new("abc\ndef\n"))
146
+
147
+ source.should have_records
148
+ source.read.should == "abc\n"
149
+ source.read.should == "def\n"
150
+ source.should_not have_records
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,8 @@
1
+ require 'spec_helper'
2
+
3
+ describe "the default configuration" do
4
+ it "should add the EXAMPLE01 type to the configuration" do
5
+ require 'ascii-data-tools/discover'
6
+ AsciiDataTools.record_types.find_by_name("EXAMPLE01").should_not be_nil
7
+ end
8
+ end
@@ -0,0 +1,82 @@
1
+ require 'spec_helper'
2
+ require 'filter_helper'
3
+
4
+ require 'ascii-data-tools/configuration'
5
+ require 'ascii-data-tools/filter'
6
+ require 'stringio'
7
+
8
+ module AsciiDataTools
9
+ module Filter
10
+ module Diffing
11
+ describe DiffExecutingFilter do
12
+ it "should return the diff if the inputs are not the same" do
13
+ should output("2a3\n> xyz\n").from_upstream([input_source_containing("abc\ndef\n"), input_source_containing("abc\ndef\nxyz\n")])
14
+ end
15
+
16
+ it "should raise an exception when the streams are the same" do
17
+ filter = DiffExecutingFilter.new
18
+ filter << [input_source_containing("abc\ndef\n"), input_source_containing("abc\ndef\n")]
19
+ lambda { filter.write(StringIO.new) }.should raise_error(StreamsEqualException)
20
+ end
21
+ end
22
+
23
+ describe DiffParsingFilter do
24
+ it "should sieve the diffs into left and right lines" do
25
+ filter = DiffParsingFilter.new
26
+ filter << input_source_containing("4c4,5\n< abc\n---\n> def\n> ghi\n")
27
+ difference = filter.read
28
+ difference.left_contents.should == ["abc\n"]
29
+ difference.right_contents.should == ["def\n", "ghi\n"]
30
+ end
31
+
32
+ context "for conflicts" do
33
+ it "should detect a one-line difference" do
34
+ filter = DiffParsingFilter.new
35
+ filter << input_source_containing("4c4\n< abc\n---\n> def\n")
36
+ filter.read.should be_a(Difference)
37
+ filter.should_not have_records
38
+ end
39
+
40
+ it "should detect a multi-line difference" do
41
+ filter = DiffParsingFilter.new
42
+ filter << input_source_containing("1,2c1,3\n< abc\n< def\n---\n> ghi\n> jkl\n> mno\n")
43
+ filter.read.should be_a(Difference)
44
+ filter.should_not have_records
45
+ end
46
+ end
47
+
48
+ context "for additions" do
49
+ it "should detect a one-line difference" do
50
+ filter = DiffParsingFilter.new
51
+ filter << input_source_containing("1a2\n> def\n")
52
+ filter.read.should be_a(Difference)
53
+ filter.should_not have_records
54
+ end
55
+
56
+ it "should detect a multi-line difference" do
57
+ filter = DiffParsingFilter.new
58
+ filter << input_source_containing("1a2,3\n> def\n> xyz\n")
59
+ filter.read.should be_a(Difference)
60
+ filter.should_not have_records
61
+ end
62
+ end
63
+
64
+ context "for deletions" do
65
+ it "should detect a one-line difference" do
66
+ filter = DiffParsingFilter.new
67
+ filter << input_source_containing("1d2\n< def\n")
68
+ filter.read.should be_a(Difference)
69
+ filter.should_not have_records
70
+ end
71
+
72
+ it "should detect a multi-line difference" do
73
+ filter = DiffParsingFilter.new
74
+ filter << input_source_containing("1,3d2\n< def\n< xyz\n\< wuv\n")
75
+ filter.read.should be_a(Difference)
76
+ filter.should_not have_records
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,107 @@
1
+ require 'spec_helper'
2
+ require 'filter_helper'
3
+
4
+ require 'ascii-data-tools/configuration'
5
+ require 'ascii-data-tools/filter'
6
+ require 'ascii-data-tools/record_type'
7
+ require 'stringio'
8
+
9
+ module AsciiDataTools
10
+ module Filter
11
+ describe Filter do
12
+ it "should read from 'upstream' and filter when reading" do
13
+ filter = Filter.new do |record|
14
+ record.strip.reverse + "\n"
15
+ end
16
+ filter << mock("upstream object", :read => "abc\n")
17
+
18
+ filter.read.should == "cba\n"
19
+ end
20
+
21
+ it "should read from upstream and write to given output" do
22
+ Filter.new do |record|
23
+ record.strip.reverse + "\n"
24
+ end.should output("cba\nfed\n").from_upstream("abc\ndef\n")
25
+ end
26
+
27
+ it "should be chainable" do
28
+ f1 = Filter.new {|r| r.gsub(/\d/, "X") }
29
+ f2 = Filter.new {|r| r.count("X").to_s }
30
+ f3 = Filter.new {|r| r }
31
+
32
+ f3 << (f2 << (f1 << input_source_containing("ab1cd2")))
33
+ f3.read.should == "2"
34
+ end
35
+ end
36
+
37
+ describe BufferingFilter do
38
+ it "should buffer the upstream into a tempfile before the first read and then return it" do
39
+ BufferingFilter.new do |buffered_upstream_as_tempfile|
40
+ buffered_upstream_as_tempfile
41
+ end.should output("abc\ndef\n").from_upstream("abc\ndef\n")
42
+ end
43
+
44
+ it "should be chainable" do
45
+ first_filter = BufferingFilter.new do |tempfile|
46
+ StringIO.new(tempfile.readlines.map {|s| s.upcase}.join(""))
47
+ end
48
+ BufferingFilter.new do |tempfile|
49
+ StringIO.new(tempfile.readlines.map {|s| s.strip + "n" + "\n" }.join(""))
50
+ end.should output("ABCn\nDEFn\n").from_upstream(first_filter, "abc\ndef\n")
51
+ end
52
+ end
53
+
54
+ describe SortingFilter do
55
+ it "should sort the given stream" do
56
+ should output("abc\ndef\nxyz\n").from_upstream("xyz\nabc\ndef\n")
57
+ end
58
+ end
59
+
60
+ DECODED_FIXED_LENGTH_RECORD = <<STR
61
+ Record 01 (ABC)
62
+ 01 field1 : [12345]-----
63
+ 02 field10 : [abc]-------
64
+ 03 field3 : [\\n]--------
65
+
66
+ STR
67
+
68
+ SEVERAL_FIXED_LENGTH_RECORDS = <<STR
69
+ Record 01 (unknown)
70
+ 01 UNKNOWN : [12345]-----
71
+
72
+ Record 02 (unknown)
73
+ 01 UNKNOWN : [abc]-----
74
+
75
+ STR
76
+
77
+ describe ParsingFilter do
78
+ include RecordTypeHelpers
79
+ include AsciiDataTools::Record
80
+ include AsciiDataTools::RecordType
81
+ it "should identify a decoded record and encode it" do
82
+ type = type("ABC") do
83
+ field 'field1', :length => 5
84
+ field 'filed10', :length => 3
85
+ field 'field3', :length => 1
86
+ end
87
+ record_types = mock(AsciiDataTools::RecordType::RecordTypeRepository)
88
+ record_types.should_receive(:find_by_name).with("ABC").and_return(type)
89
+
90
+ filter = ParsingFilter.new(record_types)
91
+ filter << input_source_containing(DECODED_FIXED_LENGTH_RECORD)
92
+ filter.read.should == AsciiDataTools::Record::Record.new(type, ["12345", "abc", "\n"])
93
+ end
94
+
95
+ it "should identify a decoded record and encode it" do
96
+ type = AsciiDataTools::RecordType::UnknownType.new
97
+ record_types = mock(AsciiDataTools::RecordType::RecordTypeRepository)
98
+ record_types.should_receive(:find_by_name).with("unknown").twice.and_return(type)
99
+
100
+ filter = ParsingFilter.new(record_types)
101
+ filter << input_source_containing(SEVERAL_FIXED_LENGTH_RECORDS)
102
+ filter.read.should == AsciiDataTools::Record::Record.new(type, ["12345"])
103
+ filter.read.should == AsciiDataTools::Record::Record.new(type, ["abc"])
104
+ end
105
+ end
106
+ end
107
+ end