quandl_format 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5f454dd17f3f3d71afa6ecda800df054fcc52f27
4
- data.tar.gz: 04b4dab5f31e57b2f7c6e6af810e952fe25524ae
3
+ metadata.gz: e3c22439813d6d95d978d4f5f1208445e4d8f25d
4
+ data.tar.gz: 89860b891f3076468f853d3151f50a6c48b5a8ca
5
5
  SHA512:
6
- metadata.gz: 75af0ccda893f85cc46719d1fadf2ddfa65b6abef92075ecddcc098ba4f7c1ed7ea1ba5b031223f523921293639b5d313ace67ec55a28bf0c132bad20f11d78e
7
- data.tar.gz: cb56897d0c1ec4b625708a85917ecc88efba46d6576d4576812679822b466d436736542944890303d44741f1f04f502da709adce8ecbcff45b0a9509326b8a2f
6
+ metadata.gz: 425e8f296e868a0f2978ed42aacb0848974c382f340598988fafaabd101e1c794645566becf871060e331a552db771c211646711ea6cc3d6a3f680ce97456846
7
+ data.tar.gz: eed88eb2e36fa32feab9e70f0e52d929af540e127cea59610433319f6e4ede46445bc53dbf4b4c90995caff9e149e7a2377a8b5c3352f1c4a9bd0b67a7f2ce8d
data/UPGRADE.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.1.8
2
+
3
+ * add Quandl::Format::Dataset.each_line to upload dataset's line by line as they become availabe from whichever interface. Allows ruby GC to operate and avoid having 20k datasets in memory ...
4
+
5
+
1
6
  ## 0.1.7
2
7
 
3
8
  * Accept tabular data
@@ -1,80 +1,113 @@
1
1
  class Quandl::Format::Dataset::Load
2
2
 
3
- SECTION_DELIMITER = '-'
3
+ SYNTAX = {
4
+ comment: '#',
5
+ data: '-',
6
+ attribute: /^([a-z0-9_]+): (.+)/,
7
+ }
4
8
 
5
9
  class << self
6
10
 
11
+ def each_in_file(path, &block)
12
+ each_line( File.open(path, "r"), &block )
13
+ end
14
+
15
+ def each_line(interface, &block)
16
+ node = new_node
17
+ # for each file line
18
+ interface.each_line do |line|
19
+ # process line
20
+ node = process_line(line, node, &block)
21
+ end
22
+ process_tail(node, &block)
23
+ end
24
+
7
25
  def file(path)
8
- string(File.read(path).strip)
26
+ string( File.read(path) )
9
27
  end
10
-
28
+
11
29
  def string(input)
12
- nodes = parse_string(input)
13
- nodes = parse_yaml_and_csv(nodes)
14
- nodes = nodes_to_datasets(nodes)
15
- nodes
30
+ # prepare to collect all datasets
31
+ datasets = []
32
+ # initialize blank node
33
+ node = new_node
34
+ # for each line
35
+ input.each_line do |line|
36
+ # process each line when encountering dataset append it to datasets
37
+ node = process_line( line, node ){|d| datasets << d }
38
+ end
39
+ # signify end
40
+ process_tail(node){|d| datasets << d }
41
+ # return datasets
42
+ datasets
16
43
  end
17
44
 
18
- protected
45
+ def new_node(line=0)
46
+ { line: line, section: :attributes, data: '', attributes: '', data_line: 0 }
47
+ end
19
48
 
20
- def parse_string(input)
21
- nodes = []
22
- section_type = :data
23
- line_index = 0
24
- input.each_line do |rline|
25
- # track current line index
26
- line_index += 1
27
- # strip whitespace
28
- line = rline.strip.rstrip
29
- # ignore comments and blank lines
30
- next if line[0] == '#' || line.blank?
31
-
32
- # are we looking at an attribute?
33
- if line =~ attribute_format
34
- # if we are leaving the data section
35
- # then this is the start of a new node
36
- nodes << { attributes: '', data: '', line: line_index } if section_type == :data
37
- # update the section to attributes
38
- section_type = :attributes
39
-
40
- # have we reached the end of the attributes?
41
- elsif line == '-'
42
- # update the section to data
43
- nodes[-1][:data_line] = line_index + 1
44
- section_type = :data
45
- # skip to the next line
46
- next
49
+ def process_tail(node, &block)
50
+ # signify end
51
+ process_line('-', node, &block)
52
+ process_line('tail: end', node, &block)
53
+ end
54
+
55
+ def process_line(rline, node, &block)
56
+ # increment node line
57
+ node[:line] += 1
58
+ # strip whitespace
59
+ line = rline.strip.rstrip
60
+ # skip comments and blank lines
61
+ return node if line[0] == SYNTAX[:comment] || line.blank?
62
+ # looking at an attribute?
63
+ if line =~ SYNTAX[:attribute]
64
+ # exiting data section?
65
+ if node[:section] == :data
66
+ # we've reached the end of a node
67
+ # send it to the server
68
+ process_node(node, &block)
69
+ # start a new node while retaining current line line
70
+ node = new_node( node[:line] )
47
71
  end
48
- # add the line to it's section in the current node.
49
- # YAML must include whitespace
50
- nodes[-1][section_type] += (section_type == :data) ? "#{line}\n" : rline
72
+ # update the node's section
73
+ node[:section] = :attributes
74
+ # entering the data section?
75
+ elsif line[0] == SYNTAX[:data]
76
+ # update the node
77
+ node[:data_line] = node[:line] + 1
78
+ node[:section] = :data
79
+ # skip to the next line
80
+ return node
51
81
  end
52
- nodes
82
+ # append the line to the requested section
83
+ node[ node[:section] ] += ( node[:section] == :data ) ? "#{line}\n" : rline
84
+ # return the updated node
85
+ node
53
86
  end
54
87
 
55
- def parse_yaml_and_csv(nodes)
56
- output = []
57
- nodes.each do |node|
58
- # parse attrs as yaml
59
- node[:attributes] = parse_yaml_attributes(node)
60
- # we cant continue unless attributes are present
61
- next if node[:attributes].blank?
62
- # parse data as csv
63
- node[:attributes][:data] = Quandl::Data::Format.csv_to_array(node[:data])
64
- # onwards
65
- output << node
66
- end
67
- output
88
+ def process_node(node, &block)
89
+ node = parse_node(node)
90
+ # fail on errored node
91
+ return false if node == false
92
+ # convert node to dataset
93
+ dataset = convert_node_to_dataset(node)
94
+ # do whatever we need to do with the node
95
+ block.call( dataset ) unless dataset.nil?
96
+ # success
97
+ true
68
98
  end
69
99
 
70
- def nodes_to_datasets(nodes)
71
- datasets = []
72
- nodes.each do |node|
73
- dataset = node_to_dataset(node)
74
- datasets << dataset if dataset
75
- end
76
- datasets
100
+ def parse_node(node)
101
+ # parse attrs as yaml
102
+ node[:attributes] = parse_yaml_attributes(node)
103
+ # we cant continue unless attributes are present
104
+ return false if node[:attributes].blank?
105
+ # parse data as csv
106
+ node[:data] = Quandl::Data::Format.csv_to_array(node[:data])
107
+ node
77
108
  end
109
+
110
+ protected
78
111
 
79
112
  def parse_yaml_attributes(node)
80
113
  YAML.load( node[:attributes] ).symbolize_keys!
@@ -83,19 +116,18 @@ class Quandl::Format::Dataset::Load
83
116
  nil
84
117
  end
85
118
 
86
- def node_to_dataset(node)
87
- Quandl::Format::Dataset.new( node[:attributes] )
119
+ def convert_node_to_dataset(node)
120
+ dataset = Quandl::Format::Dataset.new( node[:attributes] )
121
+ dataset.data = node[:data]
122
+ dataset
88
123
  rescue => err
89
124
  log_dataset_error(node, err)
90
- end
91
-
92
- def attribute_format
93
- /^([a-z0-9_]+): (.+)/
125
+ nil
94
126
  end
95
127
 
96
128
  def log_yaml_parse_error(node, err)
97
129
  message = "Attribute parse error at line #{ node[:line] + err.line } column #{err.column}. #{err.problem} (#{err.class})\n"
98
- message += "Did you forget to delimit the meta data section from the data section with a one or more dashes ('-')?\n" unless node[:attributes] =~ /^-/
130
+ message += "Did you forget to delimit the meta data section from the data section with a one or more dashes ('#{SYNTAX[:data]}')?\n" unless node[:attributes] =~ /^-/
99
131
  message += "--"
100
132
  Quandl::Logger.error(message)
101
133
  end
@@ -114,7 +146,6 @@ class Quandl::Format::Dataset::Load
114
146
  message += "#{$!} (#{err.class})\n"
115
147
  message += "--"
116
148
  Quandl::Logger.error(message)
117
- nil
118
149
  end
119
150
 
120
151
  end
@@ -15,15 +15,23 @@ class Dataset
15
15
  include Quandl::Format::Dataset::Client
16
16
 
17
17
  class << self
18
+
19
+ def each_line(interface, &block)
20
+ Load.each_line(interface, &block)
21
+ end
22
+
23
+ def each_in_file(path, &block)
24
+ Load.each_in_file(path, &block)
25
+ end
18
26
 
19
27
  def load(input)
20
28
  Load.string(input)
21
29
  end
22
30
 
23
31
  def load_from_file(path)
24
- Load.file(path)
32
+ Load.string(File.read(path))
25
33
  end
26
-
34
+
27
35
  def dump(datasets)
28
36
  Dump.collection(datasets)
29
37
  end
@@ -1,5 +1,5 @@
1
1
  module Quandl
2
2
  module Format
3
- VERSION = "0.1.7"
3
+ VERSION = "0.1.8"
4
4
  end
5
5
  end
@@ -1,4 +1,4 @@
1
- code: "BLAKE_TEST_1"
1
+ code: "UNKNOWN_ATTRIBUTE_CODE"
2
2
  name: "A new title"
3
3
  this_attribute_does_not_exist: "Why is this here?"
4
4
  description: "The description Date, Open, High"
@@ -10,7 +10,7 @@ Date,Open,High,Low,Last,Close,Total Trade Quantity,Turnover (Lacs)
10
10
  code: "BLAKE_TEST_2"
11
11
  name: "A new title"
12
12
  description: "The description Date, Open, High"
13
- -
13
+ ----
14
14
  Date,Open,High,Low,Last,Close,Total Trade Quantity,Turnover (Lacs)
15
15
  2013-11-22,1252.0,454.95,448.2,450.0,450.0,1354405.0,6099.41
16
16
  2013-11-21,452.25,457.75,449.1,451.2,451.0,218881.0,992.94
@@ -21,4 +21,4 @@ description: "The description Date, Open, High"
21
21
  -
22
22
  Date,Open,High,Low,Last,Close,Total Trade Quantity,Turnover (Lacs)
23
23
  2013-11-22,1252.0,454.95,448.2,450.0,450.0,1354405.0,6099.41
24
- 2013-11-21,452.25,457.75,449.1,451.2,451.0,218881.0,992.94
24
+ 2013-11-21,452.25,457.75,449.1,451.2,451.0,218881.0,992.94
@@ -8,9 +8,10 @@ describe Quandl::Format::Dataset do
8
8
  { file: 'mismatched_columns', error: /Expected 4 but found 5/ },
9
9
  { file: 'mismatched_rows', error: /Expected 3 but found 4/ },
10
10
  { file: 'invalid_yaml', error: /could not find expected ':'/ },
11
- { file: 'missing_dashes', error: /Attribute parse error at line 6 column 1/ },
11
+ { file: 'missing_dashes', error: /Attribute parse error at line 28 column 1/ },
12
12
  { file: 'missing_dashes', error: /Did you forget to delimit the meta data section/ },
13
13
  ]
14
+
14
15
  # run each expectation
15
16
  expected_errors.each do |pair|
16
17
  it "#{pair[:file]}.qdf should error with #{pair[:error]}" do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: quandl_format
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Blake Hilscher
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-11 00:00:00.000000000 Z
11
+ date: 2013-12-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake