ascii-data-tools 0.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. data/.gitignore +3 -0
  2. data/.rvmrc +1 -0
  3. data/.travis.yml +4 -0
  4. data/Gemfile +3 -0
  5. data/Gemfile.lock +40 -0
  6. data/LICENSE.GPL2 +339 -0
  7. data/README.rdoc +52 -0
  8. data/Rakefile +42 -0
  9. data/TODO +4 -0
  10. data/ascii-data-tools.gemspec +30 -0
  11. data/bin/ascii-data-cat +13 -0
  12. data/bin/ascii-data-edit +13 -0
  13. data/bin/ascii-data-norm +13 -0
  14. data/bin/ascii-data-qdiff +13 -0
  15. data/bin/ascii-data-tools-config +9 -0
  16. data/examples/big +10000 -0
  17. data/examples/built_in_records.gz +0 -0
  18. data/examples/slightly_modified_built_in_records.gz +0 -0
  19. data/features/ascii-data-cat.feature +110 -0
  20. data/features/ascii-data-edit.feature +91 -0
  21. data/features/ascii-data-qdiff.feature +54 -0
  22. data/features/encoding_decoding.feature +68 -0
  23. data/features/normaliser.feature +27 -0
  24. data/features/plugins.feature +73 -0
  25. data/features/record_recognition.feature +61 -0
  26. data/features/step_definitions/ascii-data-cat_steps.rb +48 -0
  27. data/features/step_definitions/ascii-data-edit_steps.rb +38 -0
  28. data/features/step_definitions/ascii-data-norm_steps.rb +7 -0
  29. data/features/step_definitions/ascii-data-qdiff_steps.rb +43 -0
  30. data/features/step_definitions/encoding_decoding_steps.rb +23 -0
  31. data/features/step_definitions/plugins_steps.rb +11 -0
  32. data/features/step_definitions/record_recognition_steps.rb +10 -0
  33. data/features/support/env.rb +5 -0
  34. data/lib/ascii-data-tools.rb +8 -0
  35. data/lib/ascii-data-tools/configuration.rb +169 -0
  36. data/lib/ascii-data-tools/configuration_printer.rb +38 -0
  37. data/lib/ascii-data-tools/controller.rb +123 -0
  38. data/lib/ascii-data-tools/discover.rb +19 -0
  39. data/lib/ascii-data-tools/external_programs.rb +23 -0
  40. data/lib/ascii-data-tools/filter.rb +148 -0
  41. data/lib/ascii-data-tools/filter/diffing.rb +139 -0
  42. data/lib/ascii-data-tools/formatting.rb +109 -0
  43. data/lib/ascii-data-tools/global_autodiscovery.rb +21 -0
  44. data/lib/ascii-data-tools/record.rb +50 -0
  45. data/lib/ascii-data-tools/record_type.rb +139 -0
  46. data/lib/ascii-data-tools/record_type/builder.rb +50 -0
  47. data/lib/ascii-data-tools/record_type/decoder.rb +77 -0
  48. data/lib/ascii-data-tools/record_type/encoder.rb +17 -0
  49. data/lib/ascii-data-tools/record_type/field.rb +168 -0
  50. data/lib/ascii-data-tools/record_type/normaliser.rb +38 -0
  51. data/lib/ascii-data-tools/ruby_extensions.rb +7 -0
  52. data/lib/ascii-data-tools/version.rb +3 -0
  53. data/spec/ascii-data-tools/configuration_printer_spec.rb +51 -0
  54. data/spec/ascii-data-tools/configuration_spec.rb +153 -0
  55. data/spec/ascii-data-tools/discover_spec.rb +8 -0
  56. data/spec/ascii-data-tools/filter/diffing_spec.rb +82 -0
  57. data/spec/ascii-data-tools/filter_spec.rb +107 -0
  58. data/spec/ascii-data-tools/formatting_spec.rb +106 -0
  59. data/spec/ascii-data-tools/record_spec.rb +49 -0
  60. data/spec/ascii-data-tools/record_type/builder_spec.rb +69 -0
  61. data/spec/ascii-data-tools/record_type/decoder_spec.rb +73 -0
  62. data/spec/ascii-data-tools/record_type/encoder_spec.rb +32 -0
  63. data/spec/ascii-data-tools/record_type/field_spec.rb +160 -0
  64. data/spec/ascii-data-tools/record_type/normaliser_spec.rb +25 -0
  65. data/spec/ascii-data-tools/record_type_spec.rb +175 -0
  66. data/spec/filter_helper.rb +24 -0
  67. data/spec/record_type_helpers.rb +8 -0
  68. data/spec/spec.opts +2 -0
  69. data/spec/spec_helper.rb +5 -0
  70. metadata +196 -0
@@ -0,0 +1,106 @@
1
+ require 'spec_helper'
2
+
3
+ EXPECTED_FORMATTING_OF_FIXED_LENGTH_RECORD = <<STR
4
+ Record 01 (ABC)
5
+ 01 field1 : [12345]-----
6
+ 02 field10 : [abc]-------
7
+ 03 field3 : [\\n]--------
8
+
9
+ STR
10
+
11
+ EXPECTED_FORMATTING_OF_UNNUMBERED_FIXED_LENGTH_RECORD = <<STR
12
+ Record (ABC)
13
+ 01 field1 : [12345]-----
14
+ 02 field10 : [abc]-------
15
+ 03 field3 : [\\n]--------
16
+
17
+ STR
18
+
19
+ include RecordTypeHelpers
20
+
21
+ def fixed_length_type
22
+ type("ABC") do
23
+ field 'field1', :length => 5
24
+ field 'field10', :length => 3
25
+ field 'field3', :length => 1
26
+ end
27
+ end
28
+
29
+ module AsciiDataTools
30
+ module Formatting
31
+ describe Formatter do
32
+ it "should format a fixed-length record" do
33
+ record = AsciiDataTools::Record::Record.new(fixed_length_type, ["12345", "abc", "\n"])
34
+ Formatter.new.format(record).should == EXPECTED_FORMATTING_OF_FIXED_LENGTH_RECORD
35
+ end
36
+ end
37
+
38
+ describe FormatableType do
39
+ include RecordTypeHelpers
40
+ before do
41
+ @fields = [
42
+ make_field("field1", :length => 5),
43
+ make_field("field10", :length => 3),
44
+ make_field("field3", :length => 1)
45
+ ]
46
+ @type = Struct.new(:name, :fields, :length_of_longest_field_name).new("ABC", @fields, 7)
47
+ @type.extend(FormatableType)
48
+ end
49
+
50
+ it "should format values" do
51
+ @type.format(1, ["12345", "abc", "\n"]).should == EXPECTED_FORMATTING_OF_FIXED_LENGTH_RECORD
52
+ end
53
+ end
54
+
55
+ describe FormatableField do
56
+ include RecordTypeHelpers
57
+ before do
58
+ @field = make_field("field1", :length => 5)
59
+ @field.extend(FormatableField)
60
+ end
61
+
62
+ it "should make a template for the longest field" do
63
+ @field.template(:position => 3, :longest_field_name => 7, :longest_field_length => 5).should == "03 field1 : [%s]-----"
64
+ end
65
+
66
+ it "should make a template for fields that are not the longest" do
67
+ @field.template(:position => 3, :longest_field_name => 7, :longest_field_length => 7).should == "03 field1 : [%s]-------"
68
+ end
69
+
70
+ it "should make a template for the last field (which has a newline in it)" do
71
+ field = make_field("NEWLINE", :length => 1)
72
+ field.extend(FormatableField)
73
+ field.template(:position => 3,
74
+ :longest_field_name => 7,
75
+ :longest_field_length => 2,
76
+ :last_field? => true).should == "03 NEWLINE : [%s]-----"
77
+ end
78
+
79
+ it "should make a template for fields without lengths" do
80
+ field = mock("field", :name => "ABC")
81
+ field.extend(FormatableField)
82
+ field.template(:position => 3,
83
+ :longest_field_name => 3,
84
+ :longest_field_length => 2).should == "03 ABC : [%s]-----"
85
+ end
86
+ end
87
+
88
+ describe UnnumberedFormatableType do
89
+ include RecordTypeHelpers
90
+ before do
91
+ @fields = [
92
+ make_field("field1", :length => 5),
93
+ make_field("field10", :length => 3),
94
+ make_field("field3", :length => 1)
95
+ ]
96
+ @type = Struct.new(:name, :fields, :length_of_longest_field_name).new("ABC", @fields, 7)
97
+ @type.extend(UnnumberedFormatableType)
98
+ end
99
+
100
+ it "should format a record" do
101
+ # record = AsciiDataTools::Record::Record.new(fixed_length_type, ["12345", "abc", "\n"])
102
+ @type.format(1, ["12345", "abc", "\n"]).should == EXPECTED_FORMATTING_OF_UNNUMBERED_FIXED_LENGTH_RECORD
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,49 @@
1
+ require 'spec_helper'
2
+
3
+ module AsciiDataTools
4
+ module Record
5
+ describe Record do
6
+ include AsciiDataTools::RecordType
7
+ it "should provide the type name" do
8
+ Record.new(mock(AsciiDataTools::RecordType::Type, :name => "ABC"), nil).type_name.should == "ABC"
9
+ end
10
+
11
+ it "should provide access to its contents" do
12
+ record = Record.new(mock(AsciiDataTools::RecordType::Type, :field_names => ["field1", "field2"]), ["xyz", "abc"])
13
+ record["field1"].should == "xyz"
14
+ record["field2"].should == "abc"
15
+ end
16
+
17
+ it "should be representable as an array" do
18
+ Record.new(mock(AsciiDataTools::RecordType::Type, :field_names => ["field1", "field2"]), ["xyz", "abc"]).to_a.should == [
19
+ ["field1", "xyz"],
20
+ ["field2", "abc"]
21
+ ]
22
+ end
23
+
24
+ it "should have a string representation which includes the type name and key-value pairs" do
25
+ record_type = mock(AsciiDataTools::RecordType::Type, :name => "ABC", :field_names => ["field1", "field2"])
26
+ Record.new(record_type, ["xyz", "\n"]).to_s.should == 'ABC: field1 => "xyz", field2 => "\n"'
27
+ end
28
+
29
+ it "should defer encoding to the record type" do
30
+ type = mock(AsciiDataTools::RecordType::Type)
31
+ type.should_receive(:encode).with(["abc", "xyz"]).and_return("encoded string")
32
+ Record.new(type, ["abc", "xyz"]).encode.should == "encoded string"
33
+ end
34
+
35
+ it "should be comparable to other records" do
36
+ type = mock(AsciiDataTools::RecordType::Type, :field_names => ["field1", "field2"])
37
+ another_type = mock(AsciiDataTools::RecordType::Type, :field_names => ["field1", "field3"])
38
+
39
+ Record.new(type, ["abc", "def"]).should == Record.new(type, ["abc", "def"])
40
+ Record.new(type, ["abc", "def"]).should_not == Record.new(another_type, ["abc", "def"])
41
+ Record.new(type, ["abc", "def"]).should_not == Record.new(type, ["abc", "xyz"])
42
+ end
43
+
44
+ it "should raise an error when trying to access a non-existing field" do
45
+ lambda {Record.new(mock(AsciiDataTools::RecordType::Type, :field_names => ["existing"]), ["xyz"])['non-existing'] }.should raise_error(/non-existing.*does not exist/)
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,69 @@
1
+ require 'spec_helper'
2
+
3
+ module AsciiDataTools
4
+ module RecordType
5
+ module Builder
6
+ describe TypeBuilder do
7
+ include TypeBuilder
8
+ it "should create a type with the given name" do
9
+ build_type("ABC").name.should == "ABC"
10
+ end
11
+
12
+ it "should create a type which matches only for specific filenames (if given)" do
13
+ type = build_type("ABC", :applies_for_filenames_matching => /ABC/)
14
+ type.should be_able_to_decode(:ascii_string => "", :filename => "ABC.gz")
15
+ type.should_not be_able_to_decode(:ascii_string => "", :filename => "XYZ.gz")
16
+ end
17
+
18
+ context "for fixed-length types" do
19
+ before do
20
+ @record_type = build_type("ABC") do
21
+ field "RECORD_TYPE", :length => 3, :constrained_to => "ABC"
22
+ field "A_NUMBER", :length => 16, :constrained_to => /123/
23
+ field "RECORD_NUMBER", :length => 5, :normalised => true
24
+ field "END_OF_RECORD", :length => 1
25
+ end
26
+ end
27
+
28
+ it "should really build a fixed-length type" do
29
+ @record_type.should be_a(AsciiDataTools::RecordType::FixedLengthType)
30
+ end
31
+
32
+ it "should have the correct fields" do
33
+ @record_type.field_names.should == ["RECORD_TYPE", "A_NUMBER", "RECORD_NUMBER", "END_OF_RECORD"]
34
+ end
35
+
36
+ it "should have the correct length" do
37
+ @record_type.total_length_of_fields.should == 25
38
+ end
39
+
40
+ it "should have the correct constraints" do
41
+ @record_type.constraints_description.should == "RECORD_TYPE = ABC, A_NUMBER =~ /123/"
42
+ end
43
+
44
+ it "should normalise fields" do
45
+ @record_type.field_with_name("RECORD_NUMBER").should be_normalised
46
+ end
47
+ end
48
+
49
+ context "for csv types" do
50
+ before do
51
+ @record_type = build_type("ABC", :family => "csv", :divider => ";") do
52
+ field "RECORD_TYPE"
53
+ field "A_NUMBER"
54
+ field "RECORD_NUMBER"
55
+ end
56
+ end
57
+
58
+ it "should really build a csv type" do
59
+ @record_type.should be_a(AsciiDataTools::RecordType::CsvType)
60
+ end
61
+
62
+ it "should save the divider" do
63
+ @record_type.field_with_name(:divider).value.should == ";"
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,73 @@
1
+ require 'spec_helper'
2
+
3
+ module AsciiDataTools
4
+ module RecordType
5
+ module Decoder
6
+ describe RecordDecoder do
7
+ include RecordTypeHelpers
8
+ before do
9
+ @fields = [
10
+ make_field("field100", :length => 3),
11
+ make_field("field1", :length => 5),
12
+ make_field("field10", :length => 1)
13
+ ]
14
+ @type = Struct.new(:content_fields, :fields_by_type).new(@fields, {:meta => fields do field(:filename, :constrained_to => /abc/) end})
15
+ @type.extend(RecordTypeHelpers)
16
+ @type.extend(FixedLengthRecordDecoder)
17
+ end
18
+
19
+ it "should know whether a given string is decodable" do
20
+ @type.should be_able_to_decode(:ascii_string => "ABC12345\n")
21
+ end
22
+
23
+ it "should decode records correctly" do
24
+ @type.decode(:ascii_string => "XYZ12345\n").values.should == ["XYZ", "12345", "\n"]
25
+ end
26
+
27
+ it "should take into account constraints that are set on the fields" do
28
+ @type.content_fields[0].should_be_constrained_to("ABC")
29
+ @type.should be_able_to_decode(:ascii_string => "ABC12345\n")
30
+ @type.should_not be_able_to_decode(:ascii_string => "XYZ12345\n")
31
+ end
32
+
33
+ it "should not match ascii strings that don't have the same length as the individual fields" do
34
+ @type.should_not be_able_to_decode(:ascii_string => "ABC1234\n")
35
+ @type.should_not be_able_to_decode(:ascii_string => "ABC123456\n")
36
+ end
37
+
38
+ it "should not be able to decode if the filename field is not matching" do
39
+ @type.should be_able_to_decode(:ascii_string => "ABC12345\n")
40
+ @type.should be_able_to_decode(:ascii_string => "ABC12345\n", :filename => "abc")
41
+ @type.should_not be_able_to_decode(:ascii_string => "ABC12345\n", :filename => "def")
42
+ end
43
+ end
44
+
45
+ describe CsvRecordDecoder do
46
+ include RecordTypeHelpers
47
+ before do
48
+ @fields = [
49
+ make_field("field1"),
50
+ make_field("field2"),
51
+ make_field("field3")
52
+ ]
53
+ @type = Struct.new(:content_fields).new(@fields)
54
+ @type.stub!(:field_with_name).with(:divider).and_return(mock("divider field", :value => ";"))
55
+ @type.extend(CsvRecordDecoder)
56
+ end
57
+
58
+ it "should decode records correctly" do
59
+ @type.decode(:ascii_string => "X;Y;Z\n").values.should == ["X", "Y", "Z"]
60
+ end
61
+ end
62
+
63
+ describe UnknownRecordDecoder do
64
+ it "should decode the entire ascii string into the UNKNOWN field" do
65
+ decoder = Struct.new(:field_names).new(["UNKNOWN"])
66
+ decoder.extend(UnknownRecordDecoder)
67
+ record = decoder.decode(:ascii_string => "any string\n")
68
+ record["UNKNOWN"].should == "any string\n"
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,32 @@
1
+ require 'spec_helper'
2
+
3
+ module AsciiDataTools
4
+ module RecordType
5
+ module Encoder
6
+ describe FixedLengthRecordEncoder do
7
+ before do
8
+ @type = Object.new
9
+ @type.extend(FixedLengthRecordEncoder)
10
+ end
11
+
12
+ context "fixed length records" do
13
+ it "should pack the values of fixed length records back together" do
14
+ @type.encode(["123", "abc", "\n"]).should == "123abc\n"
15
+ end
16
+ end
17
+ end
18
+
19
+ describe CsvRecordEncoder do
20
+ before do
21
+ @type = Object.new
22
+ @type.stub!(:field_with_name).with(:divider).and_return(mock("divider field", :value => ";"))
23
+ @type.extend(CsvRecordEncoder)
24
+ end
25
+
26
+ it "should pack the values of csv records back together" do
27
+ @type.encode(["123", "abc", "XYZ"]).should == "123;abc;XYZ\n"
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,160 @@
1
+ require 'spec_helper'
2
+
3
+ module AsciiDataTools
4
+ module RecordType
5
+ module Field
6
+ describe Fields do
7
+ include RecordTypeHelpers
8
+
9
+ before do
10
+ @fields = fields do
11
+ field "field100"
12
+ field "field1"
13
+ field "field10"
14
+ end
15
+ end
16
+
17
+ it "should provide the field names" do
18
+ @fields.names.should == ["field100", "field1", "field10"]
19
+ end
20
+
21
+ it "should provide the number of content fields" do
22
+ @fields.number_of_content_fields.should == 3
23
+ end
24
+
25
+ it "should provide the length of the field with the longest name" do
26
+ @fields.length_of_longest_field_name.should == 8
27
+ end
28
+
29
+ it "should provide an empty constraints description when there are no constraints" do
30
+ @fields.constraints_description.should be_empty
31
+ end
32
+
33
+ it "should provide a list of comma-delimited field constraints as the constraints description" do
34
+ @fields.with_name("field100").should_be_constrained_to("ABC")
35
+ @fields.constraints_description.should == "field100 = ABC"
36
+
37
+ @fields.with_name("field10").should_be_constrained_to("DEF")
38
+ @fields.constraints_description.should == "field100 = ABC, field10 = DEF"
39
+ end
40
+
41
+ it "should support searching for specific fields" do
42
+ found_fields = @fields.fields_with {|field| field.name =~ /field\d{2,}/}
43
+ found_fields.should have(2).items
44
+ found_fields.with_name("field1").should be_nil
45
+ end
46
+
47
+ it "should allow mass normalisation" do
48
+ @fields.should_be_normalised
49
+ ["field100", "field1", "field10"].each do |field_name|
50
+ @fields.with_name(field_name).should be_normalised
51
+ end
52
+ end
53
+
54
+ it "should provide names of normalised fields" do
55
+ @fields.with_name("field1").should_be_normalised
56
+ @fields.with_name("field10").should_be_normalised
57
+ @fields.names_of_normalised_fields.should == "field1, field10"
58
+ end
59
+
60
+ it "should find fields by index" do
61
+ @fields.with_index(2).name.should == "field1"
62
+ end
63
+ end
64
+
65
+ describe Field do
66
+ it "should have a name" do
67
+ Field.new("name").name.should == "name"
68
+ end
69
+
70
+ context "a constrained field" do
71
+ before do
72
+ @field = Field.new("name")
73
+ @field.should_be_constrained_to("abc")
74
+ end
75
+
76
+ it "should provide a text description of the constraint" do
77
+ @field.constraint_description.should == "name = abc"
78
+ end
79
+
80
+ it "should validate input" do
81
+ @field.should be_a_valid_input("abc")
82
+ end
83
+
84
+ it "should invalidate input" do
85
+ @field.should_not be_a_valid_input("def")
86
+ end
87
+ end
88
+ end
89
+
90
+ describe FixedLengthField do
91
+ it "should have a name" do
92
+ FixedLengthField.new("name", nil).name.should == "name"
93
+ end
94
+
95
+ it "should contribute to the regexp string used for type matching" do
96
+ FixedLengthField.new(nil, 5).extend_regexp_string_for_matching("xxx").should == "xxx(.{5})"
97
+ end
98
+
99
+ it "should let the constraint contribute to the regexp string used for type matching if it is set" do
100
+ field = FixedLengthField.new(nil, 5)
101
+ field.constraint = mock("field constraint", :extend_regexp_string_for_matching => "xxxabc")
102
+ field.extend_regexp_string_for_matching("xxx").should == "xxxabc"
103
+ end
104
+ end
105
+
106
+ describe ConstantField do
107
+ it "should be able to store a value" do
108
+ field = ConstantField.new("XXX")
109
+ field.value = "abc"
110
+ field.value.should == "abc"
111
+ end
112
+ end
113
+
114
+ describe NoConstraint do
115
+ it "should always be satisfied" do
116
+ NoConstraint.new.should be_satisfied_by(Object.new)
117
+ end
118
+ end
119
+
120
+ describe FixedLengthConstraint do
121
+ it "should contribute to a regexp string by limiting the number of characters" do
122
+ FixedLengthConstraint.new(5).extend_regexp_string_for_matching('xxx').should == "xxx(.{5})"
123
+ end
124
+
125
+ it "should have an empty string representation" do
126
+ FixedLengthConstraint.new(5).to_s.should be_empty
127
+ end
128
+ end
129
+
130
+ describe OneOfConstraint do
131
+ it "should contribute to a regexp string that matches the type" do
132
+ OneOfConstraint.new("ABC", "DEF", "XYZ").extend_regexp_string_for_matching("xxx").should == "xxx(ABC|DEF|XYZ)"
133
+ end
134
+
135
+ it "should have the appropriate string representation when there is one possible value" do
136
+ OneOfConstraint.new("ABC").to_s.should == "= ABC"
137
+ end
138
+
139
+ it "should have the appropriate string representation when there is more than one value possible" do
140
+ OneOfConstraint.new(["ABC", "DEF"]).to_s.should == "one of ABC, DEF"
141
+ end
142
+ end
143
+
144
+ describe RegexpConstraint do
145
+ it "should contribute to a regexp string that matches the type" do
146
+ RegexpConstraint.new(/A\d{3}C/).extend_regexp_string_for_matching("xxx").should == "xxxA\\d{3}C"
147
+ end
148
+
149
+ it "should be satisfied when the string passed to it matches its regexp" do
150
+ RegexpConstraint.new(/ABC/).should be_satisfied_by("xyz.ABC.gz")
151
+ RegexpConstraint.new(/ABC/).should_not be_satisfied_by("xyz.UVW.gz")
152
+ end
153
+
154
+ it "should have an appropriate string representation" do
155
+ RegexpConstraint.new(/ABC/).to_s.should == "=~ /ABC/"
156
+ end
157
+ end
158
+ end
159
+ end
160
+ end