RubyGems - quandl_format - Versions diffs - 0.1.7 → 0.1.8 - Mend

quandl_format 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/UPGRADE.md +5 -0
data/lib/quandl/format/dataset/load.rb +98 -67
data/lib/quandl/format/dataset.rb +10 -2
data/lib/quandl/format/version.rb +1 -1
data/spec/fixtures/data/unknown_attribute.qdf +1 -1
data/spec/fixtures/data/valid.qdf +2 -2
data/spec/lib/quandl/format/dataset/load/errors_spec.rb +2 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 5f454dd17f3f3d71afa6ecda800df054fcc52f27
-  data.tar.gz: 04b4dab5f31e57b2f7c6e6af810e952fe25524ae
+  metadata.gz: e3c22439813d6d95d978d4f5f1208445e4d8f25d
+  data.tar.gz: 89860b891f3076468f853d3151f50a6c48b5a8ca
 SHA512:
-  metadata.gz: 75af0ccda893f85cc46719d1fadf2ddfa65b6abef92075ecddcc098ba4f7c1ed7ea1ba5b031223f523921293639b5d313ace67ec55a28bf0c132bad20f11d78e
-  data.tar.gz: cb56897d0c1ec4b625708a85917ecc88efba46d6576d4576812679822b466d436736542944890303d44741f1f04f502da709adce8ecbcff45b0a9509326b8a2f
+  metadata.gz: 425e8f296e868a0f2978ed42aacb0848974c382f340598988fafaabd101e1c794645566becf871060e331a552db771c211646711ea6cc3d6a3f680ce97456846
+  data.tar.gz: eed88eb2e36fa32feab9e70f0e52d929af540e127cea59610433319f6e4ede46445bc53dbf4b4c90995caff9e149e7a2377a8b5c3352f1c4a9bd0b67a7f2ce8d

data/UPGRADE.md CHANGED Viewed

@@ -1,3 +1,8 @@
+## 0.1.8
+* add Quandl::Format::Dataset.each_line to upload dataset's line by line as they become availabe from whichever interface. Allows ruby GC to operate and avoid having 20k datasets in memory ...
 ## 0.1.7
 * Accept tabular data

data/lib/quandl/format/dataset/load.rb CHANGED Viewed

@@ -1,80 +1,113 @@
 class Quandl::Format::Dataset::Load
-  SECTION_DELIMITER = '-'
+  SYNTAX = {
+    comment:          '#',
+    data:             '-',
+    attribute:        /^([a-z0-9_]+): (.+)/,
+  }
   class << self
+    def each_in_file(path, &block)
+      each_line( File.open(path, "r"), &block )
+    end
+    def each_line(interface, &block)
+      node = new_node
+      # for each file line
+      interface.each_line do |line|
+        # process line
+        node = process_line(line, node, &block)
+      end
+      process_tail(node, &block)
+    end
     def file(path)
-      string(File.read(path).strip)
+      string( File.read(path) )
     end
     def string(input)
-      nodes = parse_string(input)
-      nodes = parse_yaml_and_csv(nodes)
-      nodes = nodes_to_datasets(nodes)
-      nodes
+      # prepare to collect all datasets
+      datasets = []
+      # initialize blank node
+      node = new_node
+      # for each line
+      input.each_line do |line|
+        # process each line when encountering dataset append it to datasets
+        node = process_line( line, node ){|d| datasets << d }
+      end
+      # signify end
+      process_tail(node){|d| datasets << d }
+      # return datasets
+      datasets
     end
-    protected
+    def new_node(line=0)
+      { line: line, section: :attributes, data: '', attributes: '', data_line: 0 }
+    end
-    def parse_string(input)
-      nodes = []
-      section_type = :data
-      line_index = 0
-      input.each_line do |rline|
-        # track current line index
-        line_index += 1
-        # strip whitespace
-        line = rline.strip.rstrip
-        # ignore comments and blank lines
-        next if line[0] == '#' || line.blank?
-        # are we looking at an attribute?
-        if line =~ attribute_format
-          # if we are leaving the data section
-          # then this is the start of a new node
-          nodes << { attributes: '', data: '', line: line_index } if section_type == :data
-          # update the section to attributes
-          section_type = :attributes
-          # have we reached the end of the attributes?
-        elsif line == '-'
-          # update the section to data
-          nodes[-1][:data_line] = line_index + 1
-          section_type = :data
-          # skip to the next line
-          next
+    def process_tail(node, &block)
+      # signify end
+      process_line('-', node, &block)
+      process_line('tail: end', node, &block)
+    end
+    def process_line(rline, node, &block)
+      # increment node line
+      node[:line] += 1
+      # strip whitespace
+      line = rline.strip.rstrip
+      # skip comments and blank lines
+      return node if line[0] == SYNTAX[:comment] || line.blank?
+      # looking at an attribute?
+      if line =~ SYNTAX[:attribute]
+        # exiting data section?
+        if node[:section] == :data
+          # we've reached the end of a node
+          # send it to the server
+          process_node(node, &block)
+          # start a new node while retaining current line line
+          node = new_node( node[:line] )
         end
-        # add the line to it's section in the current node.
-        # YAML must include whitespace
-        nodes[-1][section_type] += (section_type == :data) ? "#{line}\n" : rline
+        # update the node's section
+        node[:section] = :attributes
+      # entering the data section?
+      elsif line[0] == SYNTAX[:data]
+        # update the node
+        node[:data_line] = node[:line] + 1
+        node[:section] = :data
+        # skip to the next line
+        return node
       end
-      nodes
+      # append the line to the requested section
+      node[ node[:section] ] += ( node[:section] == :data ) ? "#{line}\n" : rline
+      # return the updated node
+      node
     end
-    def parse_yaml_and_csv(nodes)
-      output = []
-      nodes.each do |node|
-        # parse attrs as yaml
-        node[:attributes] = parse_yaml_attributes(node)
-        # we cant continue unless attributes are present
-        next if node[:attributes].blank?
-        # parse data as csv
-        node[:attributes][:data] = Quandl::Data::Format.csv_to_array(node[:data])
-        # onwards
-        output << node
-      end
-      output
+    def process_node(node, &block)
+      node = parse_node(node)
+      # fail on errored node
+      return false if node == false
+      # convert node to dataset
+      dataset = convert_node_to_dataset(node)
+      # do whatever we need to do with the node
+      block.call( dataset ) unless dataset.nil?
+      # success
+      true
     end
-    def nodes_to_datasets(nodes)
-      datasets = []
-      nodes.each do |node|
-        dataset = node_to_dataset(node)
-        datasets << dataset if dataset
-      end
-      datasets
+    def parse_node(node)
+      # parse attrs as yaml
+      node[:attributes] = parse_yaml_attributes(node)
+      # we cant continue unless attributes are present
+      return false if node[:attributes].blank?
+      # parse data as csv
+      node[:data] = Quandl::Data::Format.csv_to_array(node[:data])
+      node
     end
+    protected
     def parse_yaml_attributes(node)
       YAML.load( node[:attributes] ).symbolize_keys!
@@ -83,19 +116,18 @@ class Quandl::Format::Dataset::Load
       nil
     end
-    def node_to_dataset(node)
-      Quandl::Format::Dataset.new( node[:attributes] )
+    def convert_node_to_dataset(node)
+      dataset = Quandl::Format::Dataset.new( node[:attributes] )
+      dataset.data = node[:data]
+      dataset
     rescue => err
       log_dataset_error(node, err)
-    end
-    def attribute_format
-      /^([a-z0-9_]+): (.+)/
+      nil
     end
     def log_yaml_parse_error(node, err)
       message = "Attribute parse error at line #{ node[:line] + err.line } column #{err.column}. #{err.problem} (#{err.class})\n"
-      message += "Did you forget to delimit the meta data section from the data section with a one or more dashes ('-')?\n" unless node[:attributes] =~ /^-/
+      message += "Did you forget to delimit the meta data section from the data section with a one or more dashes ('#{SYNTAX[:data]}')?\n" unless node[:attributes] =~ /^-/
       message += "--"
       Quandl::Logger.error(message)
     end
@@ -114,7 +146,6 @@ class Quandl::Format::Dataset::Load
       message += "#{$!} (#{err.class})\n"
       message += "--"
       Quandl::Logger.error(message)
-      nil
     end
   end

data/lib/quandl/format/dataset.rb CHANGED Viewed

@@ -15,15 +15,23 @@ class Dataset
   include Quandl::Format::Dataset::Client
   class << self
+    def each_line(interface, &block)
+      Load.each_line(interface, &block)
+    end
+    def each_in_file(path, &block)
+      Load.each_in_file(path, &block)
+    end
     def load(input)
       Load.string(input)
     end
     def load_from_file(path)
-      Load.file(path)
+      Load.string(File.read(path))
     end
     def dump(datasets)
       Dump.collection(datasets)
     end

data/lib/quandl/format/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Quandl
 module Format
-  VERSION = "0.1.7"
+  VERSION = "0.1.8"
 end
 end

data/spec/fixtures/data/unknown_attribute.qdf CHANGED Viewed

@@ -1,4 +1,4 @@
-code: "BLAKE_TEST_1"
+code: "UNKNOWN_ATTRIBUTE_CODE"
 name: "A new title"
 this_attribute_does_not_exist: "Why is this here?"
 description: "The description Date, Open, High"

data/spec/fixtures/data/valid.qdf CHANGED Viewed

@@ -10,7 +10,7 @@ Date,Open,High,Low,Last,Close,Total Trade Quantity,Turnover (Lacs)
 code: "BLAKE_TEST_2"
 name: "A new title"
 description: "The description Date, Open, High"
--
+----
 Date,Open,High,Low,Last,Close,Total Trade Quantity,Turnover (Lacs)
 2013-11-22,1252.0,454.95,448.2,450.0,450.0,1354405.0,6099.41
 2013-11-21,452.25,457.75,449.1,451.2,451.0,218881.0,992.94
@@ -21,4 +21,4 @@ description: "The description Date, Open, High"
 -
 Date,Open,High,Low,Last,Close,Total Trade Quantity,Turnover (Lacs)
 2013-11-22,1252.0,454.95,448.2,450.0,450.0,1354405.0,6099.41
-2013-11-21,452.25,457.75,449.1,451.2,451.0,218881.0,992.94
+2013-11-21,452.25,457.75,449.1,451.2,451.0,218881.0,992.94

data/spec/lib/quandl/format/dataset/load/errors_spec.rb CHANGED Viewed

@@ -8,9 +8,10 @@ describe Quandl::Format::Dataset do
     { file: 'mismatched_columns', error: /Expected 4 but found 5/ },
     { file: 'mismatched_rows',    error: /Expected 3 but found 4/ },
     { file: 'invalid_yaml',       error: /could not find expected ':'/ },
-    { file: 'missing_dashes',     error: /Attribute parse error at line 6 column 1/ },
+    { file: 'missing_dashes',     error: /Attribute parse error at line 28 column 1/ },
     { file: 'missing_dashes',     error: /Did you forget to delimit the meta data section/ },
   ]
   # run each expectation
   expected_errors.each do |pair|
     it "#{pair[:file]}.qdf should error with #{pair[:error]}" do

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: quandl_format
 version: !ruby/object:Gem::Version
-  version: 0.1.7
+  version: 0.1.8
 platform: ruby
 authors:
 - Blake Hilscher
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-12-11 00:00:00.000000000 Z
+date: 2013-12-12 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake