ascii-data-tools 0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. data/.gitignore +3 -0
  2. data/.rvmrc +1 -0
  3. data/.travis.yml +4 -0
  4. data/Gemfile +3 -0
  5. data/Gemfile.lock +40 -0
  6. data/LICENSE.GPL2 +339 -0
  7. data/README.rdoc +52 -0
  8. data/Rakefile +42 -0
  9. data/TODO +4 -0
  10. data/ascii-data-tools.gemspec +30 -0
  11. data/bin/ascii-data-cat +13 -0
  12. data/bin/ascii-data-edit +13 -0
  13. data/bin/ascii-data-norm +13 -0
  14. data/bin/ascii-data-qdiff +13 -0
  15. data/bin/ascii-data-tools-config +9 -0
  16. data/examples/big +10000 -0
  17. data/examples/built_in_records.gz +0 -0
  18. data/examples/slightly_modified_built_in_records.gz +0 -0
  19. data/features/ascii-data-cat.feature +110 -0
  20. data/features/ascii-data-edit.feature +91 -0
  21. data/features/ascii-data-qdiff.feature +54 -0
  22. data/features/encoding_decoding.feature +68 -0
  23. data/features/normaliser.feature +27 -0
  24. data/features/plugins.feature +73 -0
  25. data/features/record_recognition.feature +61 -0
  26. data/features/step_definitions/ascii-data-cat_steps.rb +48 -0
  27. data/features/step_definitions/ascii-data-edit_steps.rb +38 -0
  28. data/features/step_definitions/ascii-data-norm_steps.rb +7 -0
  29. data/features/step_definitions/ascii-data-qdiff_steps.rb +43 -0
  30. data/features/step_definitions/encoding_decoding_steps.rb +23 -0
  31. data/features/step_definitions/plugins_steps.rb +11 -0
  32. data/features/step_definitions/record_recognition_steps.rb +10 -0
  33. data/features/support/env.rb +5 -0
  34. data/lib/ascii-data-tools.rb +8 -0
  35. data/lib/ascii-data-tools/configuration.rb +169 -0
  36. data/lib/ascii-data-tools/configuration_printer.rb +38 -0
  37. data/lib/ascii-data-tools/controller.rb +123 -0
  38. data/lib/ascii-data-tools/discover.rb +19 -0
  39. data/lib/ascii-data-tools/external_programs.rb +23 -0
  40. data/lib/ascii-data-tools/filter.rb +148 -0
  41. data/lib/ascii-data-tools/filter/diffing.rb +139 -0
  42. data/lib/ascii-data-tools/formatting.rb +109 -0
  43. data/lib/ascii-data-tools/global_autodiscovery.rb +21 -0
  44. data/lib/ascii-data-tools/record.rb +50 -0
  45. data/lib/ascii-data-tools/record_type.rb +139 -0
  46. data/lib/ascii-data-tools/record_type/builder.rb +50 -0
  47. data/lib/ascii-data-tools/record_type/decoder.rb +77 -0
  48. data/lib/ascii-data-tools/record_type/encoder.rb +17 -0
  49. data/lib/ascii-data-tools/record_type/field.rb +168 -0
  50. data/lib/ascii-data-tools/record_type/normaliser.rb +38 -0
  51. data/lib/ascii-data-tools/ruby_extensions.rb +7 -0
  52. data/lib/ascii-data-tools/version.rb +3 -0
  53. data/spec/ascii-data-tools/configuration_printer_spec.rb +51 -0
  54. data/spec/ascii-data-tools/configuration_spec.rb +153 -0
  55. data/spec/ascii-data-tools/discover_spec.rb +8 -0
  56. data/spec/ascii-data-tools/filter/diffing_spec.rb +82 -0
  57. data/spec/ascii-data-tools/filter_spec.rb +107 -0
  58. data/spec/ascii-data-tools/formatting_spec.rb +106 -0
  59. data/spec/ascii-data-tools/record_spec.rb +49 -0
  60. data/spec/ascii-data-tools/record_type/builder_spec.rb +69 -0
  61. data/spec/ascii-data-tools/record_type/decoder_spec.rb +73 -0
  62. data/spec/ascii-data-tools/record_type/encoder_spec.rb +32 -0
  63. data/spec/ascii-data-tools/record_type/field_spec.rb +160 -0
  64. data/spec/ascii-data-tools/record_type/normaliser_spec.rb +25 -0
  65. data/spec/ascii-data-tools/record_type_spec.rb +175 -0
  66. data/spec/filter_helper.rb +24 -0
  67. data/spec/record_type_helpers.rb +8 -0
  68. data/spec/spec.opts +2 -0
  69. data/spec/spec_helper.rb +5 -0
  70. metadata +196 -0
@@ -0,0 +1,106 @@
1
+ require 'spec_helper'
2
+
3
+ EXPECTED_FORMATTING_OF_FIXED_LENGTH_RECORD = <<STR
4
+ Record 01 (ABC)
5
+ 01 field1 : [12345]-----
6
+ 02 field10 : [abc]-------
7
+ 03 field3 : [\\n]--------
8
+
9
+ STR
10
+
11
+ EXPECTED_FORMATTING_OF_UNNUMBERED_FIXED_LENGTH_RECORD = <<STR
12
+ Record (ABC)
13
+ 01 field1 : [12345]-----
14
+ 02 field10 : [abc]-------
15
+ 03 field3 : [\\n]--------
16
+
17
+ STR
18
+
19
+ include RecordTypeHelpers
20
+
21
+ def fixed_length_type
22
+ type("ABC") do
23
+ field 'field1', :length => 5
24
+ field 'field10', :length => 3
25
+ field 'field3', :length => 1
26
+ end
27
+ end
28
+
29
+ module AsciiDataTools
30
+ module Formatting
31
+ describe Formatter do
32
+ it "should format a fixed-length record" do
33
+ record = AsciiDataTools::Record::Record.new(fixed_length_type, ["12345", "abc", "\n"])
34
+ Formatter.new.format(record).should == EXPECTED_FORMATTING_OF_FIXED_LENGTH_RECORD
35
+ end
36
+ end
37
+
38
+ describe FormatableType do
39
+ include RecordTypeHelpers
40
+ before do
41
+ @fields = [
42
+ make_field("field1", :length => 5),
43
+ make_field("field10", :length => 3),
44
+ make_field("field3", :length => 1)
45
+ ]
46
+ @type = Struct.new(:name, :fields, :length_of_longest_field_name).new("ABC", @fields, 7)
47
+ @type.extend(FormatableType)
48
+ end
49
+
50
+ it "should format values" do
51
+ @type.format(1, ["12345", "abc", "\n"]).should == EXPECTED_FORMATTING_OF_FIXED_LENGTH_RECORD
52
+ end
53
+ end
54
+
55
+ describe FormatableField do
56
+ include RecordTypeHelpers
57
+ before do
58
+ @field = make_field("field1", :length => 5)
59
+ @field.extend(FormatableField)
60
+ end
61
+
62
+ it "should make a template for the longest field" do
63
+ @field.template(:position => 3, :longest_field_name => 7, :longest_field_length => 5).should == "03 field1 : [%s]-----"
64
+ end
65
+
66
+ it "should make a template for fields that are not the longest" do
67
+ @field.template(:position => 3, :longest_field_name => 7, :longest_field_length => 7).should == "03 field1 : [%s]-------"
68
+ end
69
+
70
+ it "should make a template for the last field (which has a newline in it)" do
71
+ field = make_field("NEWLINE", :length => 1)
72
+ field.extend(FormatableField)
73
+ field.template(:position => 3,
74
+ :longest_field_name => 7,
75
+ :longest_field_length => 2,
76
+ :last_field? => true).should == "03 NEWLINE : [%s]-----"
77
+ end
78
+
79
+ it "should make a template for fields without lengths" do
80
+ field = mock("field", :name => "ABC")
81
+ field.extend(FormatableField)
82
+ field.template(:position => 3,
83
+ :longest_field_name => 3,
84
+ :longest_field_length => 2).should == "03 ABC : [%s]-----"
85
+ end
86
+ end
87
+
88
+ describe UnnumberedFormatableType do
89
+ include RecordTypeHelpers
90
+ before do
91
+ @fields = [
92
+ make_field("field1", :length => 5),
93
+ make_field("field10", :length => 3),
94
+ make_field("field3", :length => 1)
95
+ ]
96
+ @type = Struct.new(:name, :fields, :length_of_longest_field_name).new("ABC", @fields, 7)
97
+ @type.extend(UnnumberedFormatableType)
98
+ end
99
+
100
+ it "should format a record" do
101
+ # record = AsciiDataTools::Record::Record.new(fixed_length_type, ["12345", "abc", "\n"])
102
+ @type.format(1, ["12345", "abc", "\n"]).should == EXPECTED_FORMATTING_OF_UNNUMBERED_FIXED_LENGTH_RECORD
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,49 @@
1
+ require 'spec_helper'
2
+
3
+ module AsciiDataTools
4
+ module Record
5
+ describe Record do
6
+ include AsciiDataTools::RecordType
7
+ it "should provide the type name" do
8
+ Record.new(mock(AsciiDataTools::RecordType::Type, :name => "ABC"), nil).type_name.should == "ABC"
9
+ end
10
+
11
+ it "should provide access to its contents" do
12
+ record = Record.new(mock(AsciiDataTools::RecordType::Type, :field_names => ["field1", "field2"]), ["xyz", "abc"])
13
+ record["field1"].should == "xyz"
14
+ record["field2"].should == "abc"
15
+ end
16
+
17
+ it "should be representable as an array" do
18
+ Record.new(mock(AsciiDataTools::RecordType::Type, :field_names => ["field1", "field2"]), ["xyz", "abc"]).to_a.should == [
19
+ ["field1", "xyz"],
20
+ ["field2", "abc"]
21
+ ]
22
+ end
23
+
24
+ it "should have a string representation which includes the type name and key-value pairs" do
25
+ record_type = mock(AsciiDataTools::RecordType::Type, :name => "ABC", :field_names => ["field1", "field2"])
26
+ Record.new(record_type, ["xyz", "\n"]).to_s.should == 'ABC: field1 => "xyz", field2 => "\n"'
27
+ end
28
+
29
+ it "should defer encoding to the record type" do
30
+ type = mock(AsciiDataTools::RecordType::Type)
31
+ type.should_receive(:encode).with(["abc", "xyz"]).and_return("encoded string")
32
+ Record.new(type, ["abc", "xyz"]).encode.should == "encoded string"
33
+ end
34
+
35
+ it "should be comparable to other records" do
36
+ type = mock(AsciiDataTools::RecordType::Type, :field_names => ["field1", "field2"])
37
+ another_type = mock(AsciiDataTools::RecordType::Type, :field_names => ["field1", "field3"])
38
+
39
+ Record.new(type, ["abc", "def"]).should == Record.new(type, ["abc", "def"])
40
+ Record.new(type, ["abc", "def"]).should_not == Record.new(another_type, ["abc", "def"])
41
+ Record.new(type, ["abc", "def"]).should_not == Record.new(type, ["abc", "xyz"])
42
+ end
43
+
44
+ it "should raise an error when trying to access a non-existing field" do
45
+ lambda {Record.new(mock(AsciiDataTools::RecordType::Type, :field_names => ["existing"]), ["xyz"])['non-existing'] }.should raise_error(/non-existing.*does not exist/)
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,69 @@
1
+ require 'spec_helper'
2
+
3
+ module AsciiDataTools
4
+ module RecordType
5
+ module Builder
6
+ describe TypeBuilder do
7
+ include TypeBuilder
8
+ it "should create a type with the given name" do
9
+ build_type("ABC").name.should == "ABC"
10
+ end
11
+
12
+ it "should create a type which matches only for specific filenames (if given)" do
13
+ type = build_type("ABC", :applies_for_filenames_matching => /ABC/)
14
+ type.should be_able_to_decode(:ascii_string => "", :filename => "ABC.gz")
15
+ type.should_not be_able_to_decode(:ascii_string => "", :filename => "XYZ.gz")
16
+ end
17
+
18
+ context "for fixed-length types" do
19
+ before do
20
+ @record_type = build_type("ABC") do
21
+ field "RECORD_TYPE", :length => 3, :constrained_to => "ABC"
22
+ field "A_NUMBER", :length => 16, :constrained_to => /123/
23
+ field "RECORD_NUMBER", :length => 5, :normalised => true
24
+ field "END_OF_RECORD", :length => 1
25
+ end
26
+ end
27
+
28
+ it "should really build a fixed-length type" do
29
+ @record_type.should be_a(AsciiDataTools::RecordType::FixedLengthType)
30
+ end
31
+
32
+ it "should have the correct fields" do
33
+ @record_type.field_names.should == ["RECORD_TYPE", "A_NUMBER", "RECORD_NUMBER", "END_OF_RECORD"]
34
+ end
35
+
36
+ it "should have the correct length" do
37
+ @record_type.total_length_of_fields.should == 25
38
+ end
39
+
40
+ it "should have the correct constraints" do
41
+ @record_type.constraints_description.should == "RECORD_TYPE = ABC, A_NUMBER =~ /123/"
42
+ end
43
+
44
+ it "should normalise fields" do
45
+ @record_type.field_with_name("RECORD_NUMBER").should be_normalised
46
+ end
47
+ end
48
+
49
+ context "for csv types" do
50
+ before do
51
+ @record_type = build_type("ABC", :family => "csv", :divider => ";") do
52
+ field "RECORD_TYPE"
53
+ field "A_NUMBER"
54
+ field "RECORD_NUMBER"
55
+ end
56
+ end
57
+
58
+ it "should really build a csv type" do
59
+ @record_type.should be_a(AsciiDataTools::RecordType::CsvType)
60
+ end
61
+
62
+ it "should save the divider" do
63
+ @record_type.field_with_name(:divider).value.should == ";"
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,73 @@
1
+ require 'spec_helper'
2
+
3
+ module AsciiDataTools
4
+ module RecordType
5
+ module Decoder
6
+ describe RecordDecoder do
7
+ include RecordTypeHelpers
8
+ before do
9
+ @fields = [
10
+ make_field("field100", :length => 3),
11
+ make_field("field1", :length => 5),
12
+ make_field("field10", :length => 1)
13
+ ]
14
+ @type = Struct.new(:content_fields, :fields_by_type).new(@fields, {:meta => fields do field(:filename, :constrained_to => /abc/) end})
15
+ @type.extend(RecordTypeHelpers)
16
+ @type.extend(FixedLengthRecordDecoder)
17
+ end
18
+
19
+ it "should know whether a given string is decodable" do
20
+ @type.should be_able_to_decode(:ascii_string => "ABC12345\n")
21
+ end
22
+
23
+ it "should decode records correctly" do
24
+ @type.decode(:ascii_string => "XYZ12345\n").values.should == ["XYZ", "12345", "\n"]
25
+ end
26
+
27
+ it "should take into account constraints that are set on the fields" do
28
+ @type.content_fields[0].should_be_constrained_to("ABC")
29
+ @type.should be_able_to_decode(:ascii_string => "ABC12345\n")
30
+ @type.should_not be_able_to_decode(:ascii_string => "XYZ12345\n")
31
+ end
32
+
33
+ it "should not match ascii strings that don't have the same length as the individual fields" do
34
+ @type.should_not be_able_to_decode(:ascii_string => "ABC1234\n")
35
+ @type.should_not be_able_to_decode(:ascii_string => "ABC123456\n")
36
+ end
37
+
38
+ it "should not be able to decode if the filename field is not matching" do
39
+ @type.should be_able_to_decode(:ascii_string => "ABC12345\n")
40
+ @type.should be_able_to_decode(:ascii_string => "ABC12345\n", :filename => "abc")
41
+ @type.should_not be_able_to_decode(:ascii_string => "ABC12345\n", :filename => "def")
42
+ end
43
+ end
44
+
45
+ describe CsvRecordDecoder do
46
+ include RecordTypeHelpers
47
+ before do
48
+ @fields = [
49
+ make_field("field1"),
50
+ make_field("field2"),
51
+ make_field("field3")
52
+ ]
53
+ @type = Struct.new(:content_fields).new(@fields)
54
+ @type.stub!(:field_with_name).with(:divider).and_return(mock("divider field", :value => ";"))
55
+ @type.extend(CsvRecordDecoder)
56
+ end
57
+
58
+ it "should decode records correctly" do
59
+ @type.decode(:ascii_string => "X;Y;Z\n").values.should == ["X", "Y", "Z"]
60
+ end
61
+ end
62
+
63
+ describe UnknownRecordDecoder do
64
+ it "should decode the entire ascii string into the UNKNOWN field" do
65
+ decoder = Struct.new(:field_names).new(["UNKNOWN"])
66
+ decoder.extend(UnknownRecordDecoder)
67
+ record = decoder.decode(:ascii_string => "any string\n")
68
+ record["UNKNOWN"].should == "any string\n"
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,32 @@
1
+ require 'spec_helper'
2
+
3
+ module AsciiDataTools
4
+ module RecordType
5
+ module Encoder
6
+ describe FixedLengthRecordEncoder do
7
+ before do
8
+ @type = Object.new
9
+ @type.extend(FixedLengthRecordEncoder)
10
+ end
11
+
12
+ context "fixed length records" do
13
+ it "should pack the values of fixed length records back together" do
14
+ @type.encode(["123", "abc", "\n"]).should == "123abc\n"
15
+ end
16
+ end
17
+ end
18
+
19
+ describe CsvRecordEncoder do
20
+ before do
21
+ @type = Object.new
22
+ @type.stub!(:field_with_name).with(:divider).and_return(mock("divider field", :value => ";"))
23
+ @type.extend(CsvRecordEncoder)
24
+ end
25
+
26
+ it "should pack the values of csv records back together" do
27
+ @type.encode(["123", "abc", "XYZ"]).should == "123;abc;XYZ\n"
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,160 @@
1
+ require 'spec_helper'
2
+
3
+ module AsciiDataTools
4
+ module RecordType
5
+ module Field
6
+ describe Fields do
7
+ include RecordTypeHelpers
8
+
9
+ before do
10
+ @fields = fields do
11
+ field "field100"
12
+ field "field1"
13
+ field "field10"
14
+ end
15
+ end
16
+
17
+ it "should provide the field names" do
18
+ @fields.names.should == ["field100", "field1", "field10"]
19
+ end
20
+
21
+ it "should provide the number of content fields" do
22
+ @fields.number_of_content_fields.should == 3
23
+ end
24
+
25
+ it "should provide the length of the field with the longest name" do
26
+ @fields.length_of_longest_field_name.should == 8
27
+ end
28
+
29
+ it "should provide an empty constraints description when there are no constraints" do
30
+ @fields.constraints_description.should be_empty
31
+ end
32
+
33
+ it "should provide a list of comma-delimited field constraints as the constraints description" do
34
+ @fields.with_name("field100").should_be_constrained_to("ABC")
35
+ @fields.constraints_description.should == "field100 = ABC"
36
+
37
+ @fields.with_name("field10").should_be_constrained_to("DEF")
38
+ @fields.constraints_description.should == "field100 = ABC, field10 = DEF"
39
+ end
40
+
41
+ it "should support searching for specific fields" do
42
+ found_fields = @fields.fields_with {|field| field.name =~ /field\d{2,}/}
43
+ found_fields.should have(2).items
44
+ found_fields.with_name("field1").should be_nil
45
+ end
46
+
47
+ it "should allow mass normalisation" do
48
+ @fields.should_be_normalised
49
+ ["field100", "field1", "field10"].each do |field_name|
50
+ @fields.with_name(field_name).should be_normalised
51
+ end
52
+ end
53
+
54
+ it "should provide names of normalised fields" do
55
+ @fields.with_name("field1").should_be_normalised
56
+ @fields.with_name("field10").should_be_normalised
57
+ @fields.names_of_normalised_fields.should == "field1, field10"
58
+ end
59
+
60
+ it "should find fields by index" do
61
+ @fields.with_index(2).name.should == "field1"
62
+ end
63
+ end
64
+
65
+ describe Field do
66
+ it "should have a name" do
67
+ Field.new("name").name.should == "name"
68
+ end
69
+
70
+ context "a constrained field" do
71
+ before do
72
+ @field = Field.new("name")
73
+ @field.should_be_constrained_to("abc")
74
+ end
75
+
76
+ it "should provide a text description of the constraint" do
77
+ @field.constraint_description.should == "name = abc"
78
+ end
79
+
80
+ it "should validate input" do
81
+ @field.should be_a_valid_input("abc")
82
+ end
83
+
84
+ it "should invalidate input" do
85
+ @field.should_not be_a_valid_input("def")
86
+ end
87
+ end
88
+ end
89
+
90
+ describe FixedLengthField do
91
+ it "should have a name" do
92
+ FixedLengthField.new("name", nil).name.should == "name"
93
+ end
94
+
95
+ it "should contribute to the regexp string used for type matching" do
96
+ FixedLengthField.new(nil, 5).extend_regexp_string_for_matching("xxx").should == "xxx(.{5})"
97
+ end
98
+
99
+ it "should let the constraint contribute to the regexp string used for type matching if it is set" do
100
+ field = FixedLengthField.new(nil, 5)
101
+ field.constraint = mock("field constraint", :extend_regexp_string_for_matching => "xxxabc")
102
+ field.extend_regexp_string_for_matching("xxx").should == "xxxabc"
103
+ end
104
+ end
105
+
106
+ describe ConstantField do
107
+ it "should be able to store a value" do
108
+ field = ConstantField.new("XXX")
109
+ field.value = "abc"
110
+ field.value.should == "abc"
111
+ end
112
+ end
113
+
114
+ describe NoConstraint do
115
+ it "should always be satisfied" do
116
+ NoConstraint.new.should be_satisfied_by(Object.new)
117
+ end
118
+ end
119
+
120
+ describe FixedLengthConstraint do
121
+ it "should contribute to a regexp string by limiting the number of characters" do
122
+ FixedLengthConstraint.new(5).extend_regexp_string_for_matching('xxx').should == "xxx(.{5})"
123
+ end
124
+
125
+ it "should have an empty string representation" do
126
+ FixedLengthConstraint.new(5).to_s.should be_empty
127
+ end
128
+ end
129
+
130
+ describe OneOfConstraint do
131
+ it "should contribute to a regexp string that matches the type" do
132
+ OneOfConstraint.new("ABC", "DEF", "XYZ").extend_regexp_string_for_matching("xxx").should == "xxx(ABC|DEF|XYZ)"
133
+ end
134
+
135
+ it "should have the appropriate string representation when there is one possible value" do
136
+ OneOfConstraint.new("ABC").to_s.should == "= ABC"
137
+ end
138
+
139
+ it "should have the appropriate string representation when there is more than one value possible" do
140
+ OneOfConstraint.new(["ABC", "DEF"]).to_s.should == "one of ABC, DEF"
141
+ end
142
+ end
143
+
144
+ describe RegexpConstraint do
145
+ it "should contribute to a regexp string that matches the type" do
146
+ RegexpConstraint.new(/A\d{3}C/).extend_regexp_string_for_matching("xxx").should == "xxxA\\d{3}C"
147
+ end
148
+
149
+ it "should be satisfied when the string passed to it matches its regexp" do
150
+ RegexpConstraint.new(/ABC/).should be_satisfied_by("xyz.ABC.gz")
151
+ RegexpConstraint.new(/ABC/).should_not be_satisfied_by("xyz.UVW.gz")
152
+ end
153
+
154
+ it "should have an appropriate string representation" do
155
+ RegexpConstraint.new(/ABC/).to_s.should == "=~ /ABC/"
156
+ end
157
+ end
158
+ end
159
+ end
160
+ end