RubyGems - mdarray-jcsv - Versions diffs - 0.6.3-java - Mend

mdarray-jcsv 0.6.3-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

checksums.yaml +7 -0
data/LICENSE.txt +23 -0
data/README.md +2 -0
data/Rakefile +46 -0
data/config.rb +104 -0
data/lib/constraints.rb +205 -0
data/lib/date_filters.rb +252 -0
data/lib/dimensions.rb +276 -0
data/lib/filters.rb +332 -0
data/lib/jcsv.rb +107 -0
data/lib/list_reader.rb +200 -0
data/lib/locale.rb +192 -0
data/lib/map_reader.rb +192 -0
data/lib/mdarray-jcsv.rb +24 -0
data/lib/mdarray_reader.rb +110 -0
data/lib/numeric_filters.rb +225 -0
data/lib/reader.rb +547 -0
data/lib/supercsv_interface.rb +231 -0
data/test/test_complete.rb +37 -0
data/test/test_critbit.rb +442 -0
data/test/test_customer_list.rb +436 -0
data/test/test_customer_map.rb +209 -0
data/test/test_customer_nhlist.rb +161 -0
data/test/test_deep_map.rb +264 -0
data/test/test_del.rb +73 -0
data/test/test_dimensions.rb +231 -0
data/test/test_example.rb +79 -0
data/test/test_filters.rb +374 -0
data/test/test_list_dimensions.rb +110 -0
data/test/test_mdarray.rb +227 -0
data/test/test_missing_data.rb +57 -0
data/vendor/commons-beanutils-1.8.3.jar +0 -0
data/vendor/commons-lang3-3.1.jar +0 -0
data/vendor/dozer-5.4.0.jar +0 -0
data/vendor/jcl-over-slf4j-1.6.6.jar +0 -0
data/vendor/joda-time-2.7.jar +0 -0
data/vendor/slf4j-api-1.7.5.jar +0 -0
data/vendor/snakeyaml-1.14.jar +0 -0
data/vendor/super-csv-2.4.0.jar +0 -0
data/vendor/super-csv-dozer-2.4.0.jar +0 -0
data/vendor/super-csv-java8-2.4.0.jar +0 -0
data/vendor/super-csv-joda-2.4.0.jar +0 -0
data/version.rb +2 -0
metadata +196 -0

data/test/test_customer_nhlist.rb ADDED

@@ -0,0 +1,161 @@
+# -*- coding: utf-8 -*-
+##########################################################################################
+# Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
+# and distribute this software and its documentation for educational, research, and
+# not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
+# granted, provided that the above copyright notice, this paragraph and the following two
+# paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
+# Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
+#
+# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
+# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
+# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
+# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
+# OR MODIFICATIONS.
+##########################################################################################
+require 'rubygems'
+require 'test/unit'
+require 'shoulda'
+require_relative '../config'
+require 'jcsv'
+class CSVTest < Test::Unit::TestCase
+  context "CSV test" do
+    setup do
+    end
+    #-------------------------------------------------------------------------------------
+    #
+    #-------------------------------------------------------------------------------------
+    should "parse a csv file the quick way without headers" do
+      # Setting headers to false, will read the header as a normal line
+      reader = Jcsv.reader("../data/customer_nh.csv", headers: false)
+      # read the whole file in one piece.
+      content = reader.read
+      # p content
+      assert_equal(["1", "John", "Dunbar", "13/06/1945",
+                    "1600 Amphitheatre Parkway\nMountain View, CA 94043\nUnited States",
+                    nil, nil, "\"May the Force be with you.\" - Star Wars",
+                    "jdunbar@gmail.com", "0"], content[0])
+    end
+    #-------------------------------------------------------------------------------------
+    #
+    #-------------------------------------------------------------------------------------
+    should "process headerless files with filters" do
+      # Setting headers to false, will read the header as a normal line
+      reader = Jcsv.reader("../data/customer_nh.csv", headers: false)
+      # Filters need to match the column by position, since there is no header to allow
+      # matching by names.  Columns indexed after the last filter will not be filtered
+      # in any way.  In the example bellow, no filter will be applied on column 5 and
+      # after
+      reader.filters = [Jcsv.optional >> Jcsv.int, Jcsv.not_nil, Jcsv.not_nil,
+                        Jcsv.optional >> Jcsv.date("dd/MM/yyyy")]
+      # read the whole file in one piece.
+      content = reader.read
+      assert_equal(1, content[0][0])
+      assert_equal(DateTime.parse("13/06/1945"), content[0][3])
+    end
+    #-------------------------------------------------------------------------------------
+    #
+    #-------------------------------------------------------------------------------------
+    should "allow adding custom headers to headerless files" do
+      # Setting headers to false, will read the header as a normal line
+      reader = Jcsv.reader("../data/customer_nh.csv", headers: false,
+                           custom_headers:
+                             ["customerNo", "firstName", "lastName", "birthDate",
+                              "mailingAddress", "married", "numberOfKids",
+                              "favouriteQuote", "email", "loyaltyPoints"])
+      # Add filters, so that we get 'objects' instead of strings for filtered fields
+      reader.filters = {:number_of_kids => Jcsv.optional >> Jcsv.int,
+                        :married => Jcsv.optional >> Jcsv.bool,
+                        :customer_no => Jcsv.int,
+                        :birth_date => Jcsv.date("dd/MM/yyyy")}
+      reader.read do |line_no, row_no, row, headers|
+        # First field is customer number, which is converted to int
+        assert_equal(1, row[0]) if row_no == 1
+        assert_equal("John", row[1]) if row_no == 1
+        # Field 5 is :married.  It is optional, so leaving it blank (nil) is ok.
+        assert_equal(nil, row[5]) if row_no == 1
+        # notice that field married that was "Y" is now true. Number of kids is not "0",
+        # but 0, customerNo is also and int
+        assert_equal(true, row[5]) if row_no == 2
+      end
+    end
+    #-------------------------------------------------------------------------------------
+    #
+    #-------------------------------------------------------------------------------------
+    should "Read headerless files with map if given custom_headers" do
+      # Setting headers to false, will read the header as a normal line
+      reader = Jcsv.reader("../data/customer_nh.csv", headers: false, format: :map,
+                           custom_headers:
+                             ["customerNo", "firstName", "lastName", "birthDate",
+                              "mailingAddress", "married", "numberOfKids",
+                              "favouriteQuote", "email", "loyaltyPoints"],
+                           default_filter: Jcsv.not_nil)
+      # Set numberOfKids and married as optional, otherwise an exception will be raised
+      reader.filters = {:number_of_kids => Jcsv.optional >> Jcsv.int,
+                        :married => Jcsv.optional >> Jcsv.bool,
+                        :loyalty_points => Jcsv.long,
+                        :customer_no => Jcsv.int,
+                        :birth_date => Jcsv.date("dd/MM/yyyy")}
+      # When parsing to map, it is possible to make a mapping. If column name is :false
+      # the column will be removed from the returned row
+      reader.mapping = {:number_of_kids => :numero_criancas,
+                        :married => "casado",
+                        :loyalty_points => "pontos fidelidade",
+                        :customer_no => false}
+      reader.read do |line_no, row_no, row, headers|
+        if (row_no == 5)
+          assert_equal(nil, row[:customer_no])
+          assert_equal("Bill", row[:first_name])
+          assert_equal(true, row["casado"])
+          assert_equal("1973-07-10T00:00:00+00:00", row[:birth_date].to_s)
+          assert_equal("2701 San Tomas Expressway\nSanta Clara, CA 95050\nUnited States",
+                       row[:mailing_address])
+          assert_equal(3, row[:numero_criancas])
+        end
+      end
+    end
+  end
+end

data/test/test_deep_map.rb ADDED

@@ -0,0 +1,264 @@
+# -*- coding: utf-8 -*-
+##########################################################################################
+# Copyright © 2015 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
+# and distribute this software and its documentation for educational, research, and
+# not-for-profit purposes, without fee and without a signed licensing agreement, is hereby
+# granted, provided that the above copyright notice, this paragraph and the following two
+# paragraphs appear in all copies, modifications, and distributions. Contact Rodrigo
+# Botafogo - rodrigo.a.botafogo@gmail.com for commercial licensing opportunities.
+#
+# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
+# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
+# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
+# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
+# OR MODIFICATIONS.
+##########################################################################################
+require 'rubygems'
+require 'test/unit'
+require 'shoulda'
+require 'matrix'
+require_relative '../config'
+require 'jcsv'
+class CSVTest < Test::Unit::TestCase
+  context "CSV test" do
+    setup do
+    end
+    #-------------------------------------------------------------------------------------
+    # When reading the CSV file in one big chunk and selecting deep_map: true, then each
+    # dimension will be hashed across all rows.  [This is not clear at all!!! IMPROVE.]
+    #-------------------------------------------------------------------------------------
+    should "parse multi-dimension csv file to map, chuk_size all and deep_map true" do
+      reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: :all,
+                           dimensions: [:treatment, :subject, :period], deep_map: true)
+      # remove the :patient field from the data, as this field is already given by the
+      # :subject field.
+      reader.mapping = {:patient => false}
+      # since we are reading with chunk_size = :all, then we will only get one chunk back.
+      # Then we can get the first chunk by indexing read with 0: reader.read[0]
+      treatment = reader.read[0]
+      # p treatment
+      # get the dimensions
+      treatment_type = reader.dimensions[:treatment]
+      subject = reader.dimensions[:subject]
+      period = reader.dimensions[:period]
+      # variable labels has all dimension labels
+      assert_equal(0, treatment_type.labels["placebo"])
+      assert_equal(1, treatment_type.labels["Progabide"])
+      assert_equal(1, subject.labels["2"])
+      assert_equal(13, subject.labels["14"])
+      assert_equal(58, subject.labels["59"])
+      assert_equal(0, period.labels["1"])
+      assert_equal(3, period.labels["4"])
+      assert_equal("14", treatment["placebo"]["10"]["1"][:"seizure.rate"])
+    end
+    #-------------------------------------------------------------------------------------
+    #
+    #-------------------------------------------------------------------------------------
+    should "read data with dimensions, mapping and filters" do
+      reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: :all,
+                           dimensions: [:treatment, :subject, :period], deep_map: true,
+                           default_filter: Jcsv.int)
+      # remove the :patient field from the data, as this field is already given by the
+      # :subject field.
+      reader.mapping = {:patient => false}
+      reader.filters = {:"seizure.rate" => Jcsv.float}
+      # will raise an exception as :period is not a key.  Will break as soon as we read the
+      # first period for the second user
+      treatment = reader.read[0]
+      # p treatment
+      assert_equal(14.0, treatment["placebo"]["10"]["1"][:"seizure.rate"])
+      assert_equal(19.0, treatment["Progabide"]["45"]["1"][:"seizure.rate"])
+    end
+    #-------------------------------------------------------------------------------------
+    #
+    #-------------------------------------------------------------------------------------
+    should "read data with deep_map but chunk_size not all" do
+      reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: 20,
+                           dimensions: [:treatment, :subject, :period], deep_map: true,
+                           default_filter: Jcsv.int)
+      # remove the :patient field from the data, as this field is already given by the
+      # :subject field.
+      reader.mapping = {:patient => false}
+      reader.filters = {:"seizure.rate" => Jcsv.float}
+      # will raise an exception as :period is not a key.  Will break as soon as we read the
+      # first period for the second user
+      treatment = reader.read
+      assert_equal(3.0, treatment[0]["placebo"]["2"]["1"][:"seizure.rate"])
+      # since only 20 rows read per chunk, there is no Progabide row yet. Note that there
+      # was data in the test above
+      assert_equal(nil, treatment[0]["Progabide"])
+      # chunk 10, has Progabide as a dimension
+      assert_equal(6.0, treatment[10]["Progabide"]["51"]["2"][:"seizure.rate"])
+    end
+    #-------------------------------------------------------------------------------------
+    #
+    #-------------------------------------------------------------------------------------
+    should "raise exception if key is repeated" do
+      reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: :all,
+                           dimensions: [:period], deep_map: true)
+      # will raise an exception as :period is not a key.  Will break as soon as we read the
+      # first period for the second user
+      assert_raise ( Jcsv::DuplicateKeyError ) { reader.read[0] }
+    end
+    #-------------------------------------------------------------------------------------
+    # When reading the CSV file in one big chunk and selecting deep_map: true, then each
+    # dimension will be hashed across all rows.  [This is not clear at all!!! IMPROVE.]
+    #-------------------------------------------------------------------------------------
+    should "Show errors when dimensions are not in order or missing" do
+      reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: :all,
+                           dimensions: [:period, :treatment, :subject], deep_map: true)
+      p "LOTS OF ERROR MESSAGES EXPECTED FROM HERE..."
+      # remove the :patient field from the data, as this field is already given by the
+      # :subject field.
+      reader.mapping = {:patient => false}
+      # since we are reading with chunk_size = :all, then we will only get one chunk back.
+      # Then we can get the first chunk by indexing read with 0: reader.read[0]
+      treatment = reader.read[0]
+      p "... TO HERE.  If no error messages, then something is wrong!"
+    end
+    #-------------------------------------------------------------------------------------
+    # When reading the CSV file in one big chunk and selecting deep_map: true, then each
+    # dimension will be hashed across all rows.  [This is not clear at all!!! IMPROVE.]
+    #-------------------------------------------------------------------------------------
+    should "Suppress warnings when dimensions are not in order or missing" do
+      reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: :all,
+                           dimensions: [:period, :treatment, :subject], deep_map: true,
+                           suppress_warnings: true)
+      p "No warning messages should be seen from here..."
+      # remove the :patient field from the data, as this field is already given by the
+      # :subject field.
+      reader.mapping = {:patient => false}
+      # since we are reading with chunk_size = :all, then we will only get one chunk back.
+      # Then we can get the first chunk by indexing read with 0: reader.read[0]
+      treatment = reader.read
+      # p treatment
+      p "... to here.  If there are any warning messages then there is something wrong!"
+    end
+    #-------------------------------------------------------------------------------------
+    # There is a large difference when parsing multidimensional CSV files with chunks and
+    # no chunks.  When no chunks are selected, this is identical to normal dimension
+    # reading.
+    #-------------------------------------------------------------------------------------
+    should "parse multi-dimension csv file to map no chunk" do
+      reader = Jcsv.reader("../data/epilepsy.csv", format: :map,
+                           dimensions: [:treatment, :subject, :period], deep_map: true)
+      # remove the :patient field from the data, as this field is already given by the
+      # :subject field.
+      reader.mapping = {:patient => false}
+      # since we are reading with chunk_size = :all, then we will only get one chunk back.
+      # Then we can get the first chunk by indexing read with 0: reader.read[0]
+      treatment = reader.read
+      # p treatment
+      assert_equal("11", treatment["placebo.1.1"][:base])
+      assert_equal("31", treatment["placebo.1.1"][:age])
+      assert_equal("5", treatment["placebo.1.1"][:"seizure.rate"])
+      assert_equal("11", treatment["placebo.1.2"][:base])
+      assert_equal("31", treatment["placebo.1.2"][:age])
+      assert_equal("3", treatment["placebo.1.2"][:"seizure.rate"])
+    end
+    #-------------------------------------------------------------------------------------
+    # All examples until now had chunk_size :all, but they can have smaller size.  In this
+    # example, chunk_size is 20 and it is processed by a block
+    #-------------------------------------------------------------------------------------
+    should "read with dimension and given a block" do
+      reader = Jcsv.reader("../data/epilepsy.csv", format: :map, chunk_size: 20,
+                           dimensions: [:treatment, :subject, :period], deep_map: true,
+                           default_filter: Jcsv.int)
+      reader.mapping = {:patient => false}
+      reader.read do |line_no, row_no, chunk|
+        p line_no
+        p row_no
+        p chunk
+      end
+    end
+=begin
+    #-------------------------------------------------------------------------------------
+    #
+    #-------------------------------------------------------------------------------------
+    should "read dimensions to lists" do
+      reader = Jcsv.reader("epilepsy.csv", chunk_size: :all, deep_map: true,
+                           dimensions: [:treatment, :subject, :period])
+      table = reader.read
+      # p table
+    end
+=end
+  end
+end

data/test/test_del.rb ADDED

@@ -0,0 +1,73 @@
+hash = {}
+key = "placebo.john.1"
+key.split('.').reduce(hash) { |h,m| h[m] ||= {} }
+*key, last = key.split(".")
+key.inject(hash, :fetch)[last] = {a: 1, b:2, c: 3}
+key = "placebo.john.2"
+key.split('.').reduce(hash) { |h,m| h[m] ||= {} }
+*key, last = key.split(".")
+key.inject(hash, :fetch)[last] = {a: 10, b:20, c: 30}
+puts hash #=> {"one"=>{"two"=>{"three"=>{}}}}
+p hash["placebo"]["john"]["2"]
+=begin
+require 'hashie'
+cl = Hashie::Clash.new
+cl.placebo!.john!.p1(a: 1, b: 2, c: 3)
+#cl.placebo!.john!.p2(a: 10, b: 20, c: 30)
+p cl
+=end
+rh = Hash.new {|h,k| h[k] = Hash.new(&h.default_proc) }
+h = Hash.new
+=begin
+h["placebo"] ||= Hash.new
+h["med"] ||= Hash.new
+h["placebo"]["john"] ||= Hash.new
+h["placebo"]["john"][1] ||= Hash.new
+h["placebo"] ||= Hash.new
+h["placebo"]["john"] ||= Hash.new
+h["placebo"]["john"][2] ||= Hash.new
+h["placebo"]["john"][1] = {a: 1, b: 2, c: 3}
+h["placebo"]["john"][2] = {a: 2, b: 10, c: 50}
+p h["placebo"]
+=end
+=begin
+h["placebo"] ||= Hash.new
+h["placebo"]["john"] ||= Hash.new
+h["placebo"]["john"]["1"] ||= Hash.new
+key = "placebo.john.1"
+*key, last = key.split(".")
+key.inject(h, :fetch)[last] = {a: 1, b:2, c: 3}
+h["placebo"] ||= Hash.new
+h["placebo"]["john"] ||= Hash.new
+h["placebo"]["john"]["2"] ||= Hash.new
+key = "placebo.john.2"
+*key, last = key.split(".")
+key.inject(h, :fetch)[last] = {a: 10, b:20, c: 30}
+p h["placebo"]["john"]["2"]
+=end