RubyGems - ascii-data-tools - Versions diffs - 0.9 - Mend

ascii-data-tools 0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

data/.gitignore +3 -0
data/.rvmrc +1 -0
data/.travis.yml +4 -0
data/Gemfile +3 -0
data/Gemfile.lock +40 -0
data/LICENSE.GPL2 +339 -0
data/README.rdoc +52 -0
data/Rakefile +42 -0
data/TODO +4 -0
data/ascii-data-tools.gemspec +30 -0
data/bin/ascii-data-cat +13 -0
data/bin/ascii-data-edit +13 -0
data/bin/ascii-data-norm +13 -0
data/bin/ascii-data-qdiff +13 -0
data/bin/ascii-data-tools-config +9 -0
data/examples/big +10000 -0
data/examples/built_in_records.gz +0 -0
data/examples/slightly_modified_built_in_records.gz +0 -0
data/features/ascii-data-cat.feature +110 -0
data/features/ascii-data-edit.feature +91 -0
data/features/ascii-data-qdiff.feature +54 -0
data/features/encoding_decoding.feature +68 -0
data/features/normaliser.feature +27 -0
data/features/plugins.feature +73 -0
data/features/record_recognition.feature +61 -0
data/features/step_definitions/ascii-data-cat_steps.rb +48 -0
data/features/step_definitions/ascii-data-edit_steps.rb +38 -0
data/features/step_definitions/ascii-data-norm_steps.rb +7 -0
data/features/step_definitions/ascii-data-qdiff_steps.rb +43 -0
data/features/step_definitions/encoding_decoding_steps.rb +23 -0
data/features/step_definitions/plugins_steps.rb +11 -0
data/features/step_definitions/record_recognition_steps.rb +10 -0
data/features/support/env.rb +5 -0
data/lib/ascii-data-tools.rb +8 -0
data/lib/ascii-data-tools/configuration.rb +169 -0
data/lib/ascii-data-tools/configuration_printer.rb +38 -0
data/lib/ascii-data-tools/controller.rb +123 -0
data/lib/ascii-data-tools/discover.rb +19 -0
data/lib/ascii-data-tools/external_programs.rb +23 -0
data/lib/ascii-data-tools/filter.rb +148 -0
data/lib/ascii-data-tools/filter/diffing.rb +139 -0
data/lib/ascii-data-tools/formatting.rb +109 -0
data/lib/ascii-data-tools/global_autodiscovery.rb +21 -0
data/lib/ascii-data-tools/record.rb +50 -0
data/lib/ascii-data-tools/record_type.rb +139 -0
data/lib/ascii-data-tools/record_type/builder.rb +50 -0
data/lib/ascii-data-tools/record_type/decoder.rb +77 -0
data/lib/ascii-data-tools/record_type/encoder.rb +17 -0
data/lib/ascii-data-tools/record_type/field.rb +168 -0
data/lib/ascii-data-tools/record_type/normaliser.rb +38 -0
data/lib/ascii-data-tools/ruby_extensions.rb +7 -0
data/lib/ascii-data-tools/version.rb +3 -0
data/spec/ascii-data-tools/configuration_printer_spec.rb +51 -0
data/spec/ascii-data-tools/configuration_spec.rb +153 -0
data/spec/ascii-data-tools/discover_spec.rb +8 -0
data/spec/ascii-data-tools/filter/diffing_spec.rb +82 -0
data/spec/ascii-data-tools/filter_spec.rb +107 -0
data/spec/ascii-data-tools/formatting_spec.rb +106 -0
data/spec/ascii-data-tools/record_spec.rb +49 -0
data/spec/ascii-data-tools/record_type/builder_spec.rb +69 -0
data/spec/ascii-data-tools/record_type/decoder_spec.rb +73 -0
data/spec/ascii-data-tools/record_type/encoder_spec.rb +32 -0
data/spec/ascii-data-tools/record_type/field_spec.rb +160 -0
data/spec/ascii-data-tools/record_type/normaliser_spec.rb +25 -0
data/spec/ascii-data-tools/record_type_spec.rb +175 -0
data/spec/filter_helper.rb +24 -0
data/spec/record_type_helpers.rb +8 -0
data/spec/spec.opts +2 -0
data/spec/spec_helper.rb +5 -0
metadata +196 -0

data/lib/ascii-data-tools/record_type/normaliser.rb ADDED

@@ -0,0 +1,38 @@
+module AsciiDataTools
+  module RecordType
+    module Normaliser
+      module Normaliser
+        def normalise(encoded_record)
+          @regexps_to_normalise_fields ||= make_regexps_to_normalise_fields
+          fields_to_normalise.inject(encoded_record) do |normalised_string, field|
+            normalised_string.gsub(@regexps_to_normalise_fields[field], '\1' + 'X' * field.length + '\3' )
+          end
+        end
+        protected
+        def make_regexps_to_normalise_fields
+          fields_to_normalise.inject({}) {|map, field| map[field] = make_normalising_regexp_for(field); map }
+        end
+        def fields_to_normalise
+          @fields_to_normalise ||= fields.select {|f| f.normalised?}
+        end
+        def make_normalising_regexp_for(field)
+          index_of_normalised_field = fields.index(field)
+          preceeding_fields = fields[0...index_of_normalised_field]
+          proceeding_fields = fields[index_of_normalised_field+1..-1]
+          regexp_for_preceeding_fields = preceeding_fields.collect {|f| length_match_for(f) }.join
+          regexp_for_proceeding_fields = proceeding_fields.collect {|f| length_match_for(f) }.join
+          Regexp.new("^(%s)(%s)(%s)$" % [regexp_for_preceeding_fields, length_match_for(field), regexp_for_proceeding_fields], Regexp::MULTILINE)
+        end
+        def length_match_for(field)
+          ".{#{field.length}}"
+        end
+      end
+    end
+  end
+end

data/lib/ascii-data-tools/ruby_extensions.rb ADDED

@@ -0,0 +1,7 @@
+if RUBY_VERSION =~ /1[.]9/
+  module Enumerable
+    def enum_with_index
+      map.with_index
+    end
+  end
+end

data/lib/ascii-data-tools/version.rb ADDED

@@ -0,0 +1,3 @@
+module AsciiDataTools
+  VERSION = "0.9"
+end

data/spec/ascii-data-tools/configuration_printer_spec.rb ADDED

@@ -0,0 +1,51 @@
+require 'spec_helper'
+require 'ascii-data-tools/configuration_printer'
+module AsciiDataTools
+  describe RecordTypesConfigurationPrinter do
+    before do
+      @presenter = mock(RecordTypesConfigurationPresenter,
+        :headings => ["type name", "total length", "constraints", "normalised fields"],
+        :record_type_summaries => [["x", "y", "z", "w"], ["a", "b", "c", "d"]]
+      )
+    end
+    it "should print out the headers from the presenter" do
+      RecordTypesConfigurationPrinter.new(@presenter).summary.should include("type name", "total length", "constraints", "normalised fields")
+    end
+    it "should print out the record type summaries" do
+      RecordTypesConfigurationPrinter.new(@presenter).summary.should include("x", "y", "z", "w", "a", "b", "c", "d")
+    end
+  end
+  describe RecordTypesConfigurationPresenter do
+    include RecordTypeHelpers
+    it "should provide headings" do
+      RecordTypesConfigurationPresenter.new(nil).headings.should == ["type name", "total length", "constraints", "normalised fields"]
+    end
+    it "should present every record type as a row" do
+      record_types = [type("ABC"), type("DEF")]
+      RecordTypesConfigurationPresenter.new(record_types).record_type_summaries[0].should == ["ABC", 0, "", ""]
+      RecordTypesConfigurationPresenter.new(record_types).record_type_summaries[1].should == ["DEF", 0, "", ""]
+    end
+    it "should sort the record types by the total length" do
+      longer_record_type = type("longer") { field 'XYZ', :length => 5 }
+      shorter_record_type = type("shorter") { field 'ABC', :length => 3 }
+      record_types = [longer_record_type, shorter_record_type]
+      RecordTypesConfigurationPresenter.new(record_types).record_type_summaries[0].first.should == "shorter"
+      RecordTypesConfigurationPresenter.new(record_types).record_type_summaries[1].first.should == "longer"
+    end
+    it "should present the normalised fields" do
+      record_types = [type("ABC") do
+                        field 'yyy', :length => 1
+                        field 'xxx', :length => 2, :normalised => true
+                      end
+                     ]
+      RecordTypesConfigurationPresenter.new(record_types).record_type_summaries[0].should == ["ABC", 3, "", "xxx"]
+    end
+  end
+end

data/spec/ascii-data-tools/configuration_spec.rb ADDED

@@ -0,0 +1,153 @@
+require 'spec_helper'
+require 'stringio'
+module AsciiDataTools
+  describe Configuration do
+    it "should allow overwriting the input source, output stream, record types and user feedback stream" do
+      input_source = mock("input source")
+      output_stream = mock("output stream")
+      config = Configuration.new([], {:input_sources => [input_source],
+                                      :output_stream => output_stream,
+                                      :record_types => "record types",
+                                      :user_feedback_stream => "user feedback stream"})
+      config.output_stream.should == output_stream
+      config.input_sources.should == [input_source]
+      config.record_types.should == "record types"
+      config.user_feedback_stream.should == "user feedback stream"
+    end
+    it "should not be valid unless the input stream is specified" do
+      config = Configuration.new([], :record_types => "record types")
+      config.should_not be_valid
+      config.errors.should include("No input specified.")
+    end
+    it "should accept existing flat files as input" do
+      File.stub!(:exists?).with("path/to/file").and_return(true)
+      File.should_receive(:open).with("path/to/file").and_return(mock(IO))
+      config = Configuration.new(["path/to/file"], :record_types => "record types")
+      config.should be_valid
+    end
+    it "should reject non-existing flat files as input" do
+      File.stub!(:exists?).with("path/to/file").and_return(false)
+      config = Configuration.new(["path/to/file"], :record_types => "record types")
+      config.should_not be_valid
+      config.errors.should include("File path/to/file does not exist!")
+    end
+    it "should exit when passed invalid options" do
+      config = Configuration.new(["-xxx"], :record_types => "record types")
+      config.should_not be_valid
+      config.errors.should include("invalid option: -xxx")
+    end
+    it "should load record types using autodiscovery by default" do
+      AsciiDataTools.should_receive(:autodiscover).once
+      AsciiDataTools.stub!(:record_types).and_return("record types")
+      Configuration.new([]).record_types.should == "record types"
+    end
+    it "should use the override for record types if specified" do
+      AsciiDataTools.should_receive(:autodiscover).exactly(0).times
+      Configuration.new([], :record_types => "overriden record types").record_types.should == "overriden record types"
+    end
+  end
+  describe InputSourceFactory do
+    it "should use STDIN as the stream when - is the input argument" do
+      source_from(["-"]).stream.should == STDIN
+    end
+    it "should raise an error if the path specified in the input argument does not exist" do
+      lambda { source_from(["path/to/non-existent-file"]) }.should raise_error(/does not exist/)
+    end
+    it "should raise an error if the input parameters are empty" do
+      lambda { source_from([]) }.should raise_error(/No input specified/)
+    end
+    it "should raise an error if the wrong number of input parameters is specified" do
+      lambda { source_from(["x", "y"]) }.should raise_error(/2 input sources detected/i)
+    end
+    it "should process multiple input sources if so configured" do
+      File.stub!(:exists?).with("path/to/file1").and_return(true)
+      File.should_receive(:open).with("path/to/file1").and_return("IO stream 1")
+      File.stub!(:exists?).with("path/to/file2").and_return(true)
+      File.should_receive(:open).with("path/to/file2").and_return("IO stream 2")
+      factory = InputSourceFactory.new(:expected_argument_number => 2)
+      sources = factory.input_sources_from ["path/to/file1", "path/to/file2"]
+      sources[0].stream.should == "IO stream 1"
+      sources[1].stream.should == "IO stream 2"
+    end
+    it "should reject the input pipe as an argument if so configured" do
+      lambda { InputSourceFactory.new(:input_pipe_accepted => false).input_sources_from(["-"]) }.should raise_error /STDIN/
+    end
+    it "should open the file normally if the path specified in the input argument exists and the file is not gzipped" do
+      File.stub!(:exists?).with("path/to/file").and_return(true)
+      File.should_receive(:open).with("path/to/file").and_return("IO stream")
+      source_from(["path/to/file"]).stream.should == "IO stream"
+    end
+    it "should open the file as a gzip read stream if the path specified in the input argument exists and the file is gzipped" do
+      File.stub!(:exists?).with("path/to/file.gz").and_return(true)
+      Zlib::GzipReader.should_receive(:open).with("path/to/file.gz").and_return("IO stream")
+      source_from(["path/to/file.gz"]).stream.should == "IO stream"
+    end
+    def source_from(args)
+      InputSourceFactory.new(:expected_argument_number => 1, :input_pipe_accepted => true).input_sources_from(args).first
+    end
+  end
+  describe Editor do
+    it "should write input streams to files" do
+      result_aggregator = ""
+      editor = Editor.new do |filenames|
+        result_aggregator = filenames.inject(result_aggregator) {|agg, f| agg + File.read(f) }
+      end
+      editor[0] << "file1 "
+      editor[1] << "file2 "
+      editor[2] << "file3"
+      editor.edit
+      result_aggregator.should == "file1 file2 file3"
+    end
+    it "should detect when no changes were made during editing" do
+      editor = Editor.new do |filenames| end
+      editor[0] << "hello"
+      editor.edit
+      editor.changed?(0).should be_false
+    end
+    it "should detect when a change was made during editing" do
+      now = Time.new
+      File.should_receive(:mtime).and_return(now, now+1)
+      editor = Editor.new do |filenames| end
+      editor[0] << "hello"
+      editor.edit
+      editor.changed?(0).should be_true
+    end
+  end
+  describe InputSource do
+    it "should read a line from the input stream when prompted to read and should know when it's full or empty" do
+      source = InputSource.new("some file", StringIO.new("abc\ndef\n"))
+      source.should have_records
+      source.read.should == "abc\n"
+      source.read.should == "def\n"
+      source.should_not have_records
+    end
+  end
+end

data/spec/ascii-data-tools/discover_spec.rb ADDED

@@ -0,0 +1,8 @@
+require 'spec_helper'
+describe "the default configuration" do
+  it "should add the EXAMPLE01 type to the configuration" do
+    require 'ascii-data-tools/discover'
+    AsciiDataTools.record_types.find_by_name("EXAMPLE01").should_not be_nil
+  end
+end

data/spec/ascii-data-tools/filter/diffing_spec.rb ADDED

@@ -0,0 +1,82 @@
+require 'spec_helper'
+require 'filter_helper'
+require 'ascii-data-tools/configuration'
+require 'ascii-data-tools/filter'
+require 'stringio'
+module AsciiDataTools
+  module Filter
+    module Diffing
+      describe DiffExecutingFilter do
+        it "should return the diff if the inputs are not the same" do
+          should output("2a3\n> xyz\n").from_upstream([input_source_containing("abc\ndef\n"), input_source_containing("abc\ndef\nxyz\n")])
+        end
+        it "should raise an exception when the streams are the same" do
+          filter = DiffExecutingFilter.new
+          filter << [input_source_containing("abc\ndef\n"), input_source_containing("abc\ndef\n")]
+          lambda { filter.write(StringIO.new) }.should raise_error(StreamsEqualException)
+        end
+      end
+      describe DiffParsingFilter do
+        it "should sieve the diffs into left and right lines" do
+          filter = DiffParsingFilter.new
+          filter << input_source_containing("4c4,5\n< abc\n---\n> def\n> ghi\n")
+          difference = filter.read
+          difference.left_contents.should == ["abc\n"]
+          difference.right_contents.should == ["def\n", "ghi\n"]
+        end
+        context "for conflicts" do
+          it "should detect a one-line difference" do
+            filter = DiffParsingFilter.new
+            filter << input_source_containing("4c4\n< abc\n---\n> def\n")
+            filter.read.should be_a(Difference)
+            filter.should_not have_records
+          end
+          it "should detect a multi-line difference" do
+            filter = DiffParsingFilter.new
+            filter << input_source_containing("1,2c1,3\n< abc\n< def\n---\n> ghi\n> jkl\n> mno\n")
+            filter.read.should be_a(Difference)
+            filter.should_not have_records
+          end
+        end
+        context "for additions" do
+          it "should detect a one-line difference" do
+            filter = DiffParsingFilter.new
+            filter << input_source_containing("1a2\n> def\n")
+            filter.read.should be_a(Difference)
+            filter.should_not have_records
+          end
+          it "should detect a multi-line difference" do
+            filter = DiffParsingFilter.new
+            filter << input_source_containing("1a2,3\n> def\n> xyz\n")
+            filter.read.should be_a(Difference)
+            filter.should_not have_records
+          end
+        end
+        context "for deletions" do
+          it "should detect a one-line difference" do
+            filter = DiffParsingFilter.new
+            filter << input_source_containing("1d2\n< def\n")
+            filter.read.should be_a(Difference)
+            filter.should_not have_records
+          end
+          it "should detect a multi-line difference" do
+            filter = DiffParsingFilter.new
+            filter << input_source_containing("1,3d2\n< def\n< xyz\n\< wuv\n")
+            filter.read.should be_a(Difference)
+            filter.should_not have_records
+          end
+        end
+      end
+    end
+  end
+end

data/spec/ascii-data-tools/filter_spec.rb ADDED

@@ -0,0 +1,107 @@
+require 'spec_helper'
+require 'filter_helper'
+require 'ascii-data-tools/configuration'
+require 'ascii-data-tools/filter'
+require 'ascii-data-tools/record_type'
+require 'stringio'
+module AsciiDataTools
+  module Filter
+    describe Filter do
+      it "should read from 'upstream' and filter when reading" do
+        filter = Filter.new do |record|
+          record.strip.reverse + "\n"
+        end
+        filter << mock("upstream object", :read => "abc\n")
+        filter.read.should == "cba\n"
+      end
+      it "should read from upstream and write to given output" do
+        Filter.new do |record|
+          record.strip.reverse + "\n"
+        end.should output("cba\nfed\n").from_upstream("abc\ndef\n")
+      end
+      it "should be chainable" do
+        f1 = Filter.new {|r| r.gsub(/\d/, "X") }
+        f2 = Filter.new {|r| r.count("X").to_s }
+        f3 = Filter.new {|r| r }
+        f3 << (f2 << (f1 << input_source_containing("ab1cd2")))
+        f3.read.should == "2"
+      end
+    end
+    describe BufferingFilter do
+      it "should buffer the upstream into a tempfile before the first read and then return it" do
+        BufferingFilter.new do |buffered_upstream_as_tempfile|
+          buffered_upstream_as_tempfile
+        end.should output("abc\ndef\n").from_upstream("abc\ndef\n")
+      end
+      it "should be chainable" do
+        first_filter = BufferingFilter.new do |tempfile|
+          StringIO.new(tempfile.readlines.map {|s| s.upcase}.join(""))
+        end
+        BufferingFilter.new do |tempfile|
+          StringIO.new(tempfile.readlines.map {|s| s.strip + "n" + "\n" }.join(""))
+        end.should output("ABCn\nDEFn\n").from_upstream(first_filter, "abc\ndef\n")
+      end
+    end
+    describe SortingFilter do
+      it "should sort the given stream" do
+        should output("abc\ndef\nxyz\n").from_upstream("xyz\nabc\ndef\n")
+      end
+    end
+    DECODED_FIXED_LENGTH_RECORD = <<STR
+Record 01 (ABC)
+01 field1  : [12345]-----
+02 field10 : [abc]-------
+03 field3  : [\\n]--------
+STR
+    SEVERAL_FIXED_LENGTH_RECORDS = <<STR
+Record 01 (unknown)
+01 UNKNOWN  : [12345]-----
+Record 02 (unknown)
+01 UNKNOWN  : [abc]-----
+STR
+    describe ParsingFilter do
+      include RecordTypeHelpers
+      include AsciiDataTools::Record
+      include AsciiDataTools::RecordType
+      it "should identify a decoded record and encode it" do
+        type = type("ABC") do
+          field 'field1',  :length => 5
+          field 'filed10', :length => 3
+          field 'field3',  :length => 1
+        end
+        record_types = mock(AsciiDataTools::RecordType::RecordTypeRepository)
+        record_types.should_receive(:find_by_name).with("ABC").and_return(type)
+        filter = ParsingFilter.new(record_types)
+        filter << input_source_containing(DECODED_FIXED_LENGTH_RECORD)
+        filter.read.should == AsciiDataTools::Record::Record.new(type, ["12345", "abc", "\n"])
+      end
+      it "should identify a decoded record and encode it" do
+        type = AsciiDataTools::RecordType::UnknownType.new
+        record_types = mock(AsciiDataTools::RecordType::RecordTypeRepository)
+        record_types.should_receive(:find_by_name).with("unknown").twice.and_return(type)
+        filter = ParsingFilter.new(record_types)
+        filter << input_source_containing(SEVERAL_FIXED_LENGTH_RECORDS)
+        filter.read.should == AsciiDataTools::Record::Record.new(type, ["12345"])
+        filter.read.should == AsciiDataTools::Record::Record.new(type, ["abc"])
+      end
+    end
+  end
+end