quandl_format 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5f454dd17f3f3d71afa6ecda800df054fcc52f27
4
- data.tar.gz: 04b4dab5f31e57b2f7c6e6af810e952fe25524ae
3
+ metadata.gz: e3c22439813d6d95d978d4f5f1208445e4d8f25d
4
+ data.tar.gz: 89860b891f3076468f853d3151f50a6c48b5a8ca
5
5
  SHA512:
6
- metadata.gz: 75af0ccda893f85cc46719d1fadf2ddfa65b6abef92075ecddcc098ba4f7c1ed7ea1ba5b031223f523921293639b5d313ace67ec55a28bf0c132bad20f11d78e
7
- data.tar.gz: cb56897d0c1ec4b625708a85917ecc88efba46d6576d4576812679822b466d436736542944890303d44741f1f04f502da709adce8ecbcff45b0a9509326b8a2f
6
+ metadata.gz: 425e8f296e868a0f2978ed42aacb0848974c382f340598988fafaabd101e1c794645566becf871060e331a552db771c211646711ea6cc3d6a3f680ce97456846
7
+ data.tar.gz: eed88eb2e36fa32feab9e70f0e52d929af540e127cea59610433319f6e4ede46445bc53dbf4b4c90995caff9e149e7a2377a8b5c3352f1c4a9bd0b67a7f2ce8d
data/UPGRADE.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.1.8
2
+
3
+ * add Quandl::Format::Dataset.each_line to upload dataset's line by line as they become availabe from whichever interface. Allows ruby GC to operate and avoid having 20k datasets in memory ...
4
+
5
+
1
6
  ## 0.1.7
2
7
 
3
8
  * Accept tabular data
@@ -1,80 +1,113 @@
1
1
  class Quandl::Format::Dataset::Load
2
2
 
3
- SECTION_DELIMITER = '-'
3
+ SYNTAX = {
4
+ comment: '#',
5
+ data: '-',
6
+ attribute: /^([a-z0-9_]+): (.+)/,
7
+ }
4
8
 
5
9
  class << self
6
10
 
11
+ def each_in_file(path, &block)
12
+ each_line( File.open(path, "r"), &block )
13
+ end
14
+
15
+ def each_line(interface, &block)
16
+ node = new_node
17
+ # for each file line
18
+ interface.each_line do |line|
19
+ # process line
20
+ node = process_line(line, node, &block)
21
+ end
22
+ process_tail(node, &block)
23
+ end
24
+
7
25
  def file(path)
8
- string(File.read(path).strip)
26
+ string( File.read(path) )
9
27
  end
10
-
28
+
11
29
  def string(input)
12
- nodes = parse_string(input)
13
- nodes = parse_yaml_and_csv(nodes)
14
- nodes = nodes_to_datasets(nodes)
15
- nodes
30
+ # prepare to collect all datasets
31
+ datasets = []
32
+ # initialize blank node
33
+ node = new_node
34
+ # for each line
35
+ input.each_line do |line|
36
+ # process each line when encountering dataset append it to datasets
37
+ node = process_line( line, node ){|d| datasets << d }
38
+ end
39
+ # signify end
40
+ process_tail(node){|d| datasets << d }
41
+ # return datasets
42
+ datasets
16
43
  end
17
44
 
18
- protected
45
+ def new_node(line=0)
46
+ { line: line, section: :attributes, data: '', attributes: '', data_line: 0 }
47
+ end
19
48
 
20
- def parse_string(input)
21
- nodes = []
22
- section_type = :data
23
- line_index = 0
24
- input.each_line do |rline|
25
- # track current line index
26
- line_index += 1
27
- # strip whitespace
28
- line = rline.strip.rstrip
29
- # ignore comments and blank lines
30
- next if line[0] == '#' || line.blank?
31
-
32
- # are we looking at an attribute?
33
- if line =~ attribute_format
34
- # if we are leaving the data section
35
- # then this is the start of a new node
36
- nodes << { attributes: '', data: '', line: line_index } if section_type == :data
37
- # update the section to attributes
38
- section_type = :attributes
39
-
40
- # have we reached the end of the attributes?
41
- elsif line == '-'
42
- # update the section to data
43
- nodes[-1][:data_line] = line_index + 1
44
- section_type = :data
45
- # skip to the next line
46
- next
49
+ def process_tail(node, &block)
50
+ # signify end
51
+ process_line('-', node, &block)
52
+ process_line('tail: end', node, &block)
53
+ end
54
+
55
+ def process_line(rline, node, &block)
56
+ # increment node line
57
+ node[:line] += 1
58
+ # strip whitespace
59
+ line = rline.strip.rstrip
60
+ # skip comments and blank lines
61
+ return node if line[0] == SYNTAX[:comment] || line.blank?
62
+ # looking at an attribute?
63
+ if line =~ SYNTAX[:attribute]
64
+ # exiting data section?
65
+ if node[:section] == :data
66
+ # we've reached the end of a node
67
+ # send it to the server
68
+ process_node(node, &block)
69
+ # start a new node while retaining current line line
70
+ node = new_node( node[:line] )
47
71
  end
48
- # add the line to it's section in the current node.
49
- # YAML must include whitespace
50
- nodes[-1][section_type] += (section_type == :data) ? "#{line}\n" : rline
72
+ # update the node's section
73
+ node[:section] = :attributes
74
+ # entering the data section?
75
+ elsif line[0] == SYNTAX[:data]
76
+ # update the node
77
+ node[:data_line] = node[:line] + 1
78
+ node[:section] = :data
79
+ # skip to the next line
80
+ return node
51
81
  end
52
- nodes
82
+ # append the line to the requested section
83
+ node[ node[:section] ] += ( node[:section] == :data ) ? "#{line}\n" : rline
84
+ # return the updated node
85
+ node
53
86
  end
54
87
 
55
- def parse_yaml_and_csv(nodes)
56
- output = []
57
- nodes.each do |node|
58
- # parse attrs as yaml
59
- node[:attributes] = parse_yaml_attributes(node)
60
- # we cant continue unless attributes are present
61
- next if node[:attributes].blank?
62
- # parse data as csv
63
- node[:attributes][:data] = Quandl::Data::Format.csv_to_array(node[:data])
64
- # onwards
65
- output << node
66
- end
67
- output
88
+ def process_node(node, &block)
89
+ node = parse_node(node)
90
+ # fail on errored node
91
+ return false if node == false
92
+ # convert node to dataset
93
+ dataset = convert_node_to_dataset(node)
94
+ # do whatever we need to do with the node
95
+ block.call( dataset ) unless dataset.nil?
96
+ # success
97
+ true
68
98
  end
69
99
 
70
- def nodes_to_datasets(nodes)
71
- datasets = []
72
- nodes.each do |node|
73
- dataset = node_to_dataset(node)
74
- datasets << dataset if dataset
75
- end
76
- datasets
100
+ def parse_node(node)
101
+ # parse attrs as yaml
102
+ node[:attributes] = parse_yaml_attributes(node)
103
+ # we cant continue unless attributes are present
104
+ return false if node[:attributes].blank?
105
+ # parse data as csv
106
+ node[:data] = Quandl::Data::Format.csv_to_array(node[:data])
107
+ node
77
108
  end
109
+
110
+ protected
78
111
 
79
112
  def parse_yaml_attributes(node)
80
113
  YAML.load( node[:attributes] ).symbolize_keys!
@@ -83,19 +116,18 @@ class Quandl::Format::Dataset::Load
83
116
  nil
84
117
  end
85
118
 
86
- def node_to_dataset(node)
87
- Quandl::Format::Dataset.new( node[:attributes] )
119
+ def convert_node_to_dataset(node)
120
+ dataset = Quandl::Format::Dataset.new( node[:attributes] )
121
+ dataset.data = node[:data]
122
+ dataset
88
123
  rescue => err
89
124
  log_dataset_error(node, err)
90
- end
91
-
92
- def attribute_format
93
- /^([a-z0-9_]+): (.+)/
125
+ nil
94
126
  end
95
127
 
96
128
  def log_yaml_parse_error(node, err)
97
129
  message = "Attribute parse error at line #{ node[:line] + err.line } column #{err.column}. #{err.problem} (#{err.class})\n"
98
- message += "Did you forget to delimit the meta data section from the data section with a one or more dashes ('-')?\n" unless node[:attributes] =~ /^-/
130
+ message += "Did you forget to delimit the meta data section from the data section with a one or more dashes ('#{SYNTAX[:data]}')?\n" unless node[:attributes] =~ /^-/
99
131
  message += "--"
100
132
  Quandl::Logger.error(message)
101
133
  end
@@ -114,7 +146,6 @@ class Quandl::Format::Dataset::Load
114
146
  message += "#{$!} (#{err.class})\n"
115
147
  message += "--"
116
148
  Quandl::Logger.error(message)
117
- nil
118
149
  end
119
150
 
120
151
  end
@@ -15,15 +15,23 @@ class Dataset
15
15
  include Quandl::Format::Dataset::Client
16
16
 
17
17
  class << self
18
+
19
+ def each_line(interface, &block)
20
+ Load.each_line(interface, &block)
21
+ end
22
+
23
+ def each_in_file(path, &block)
24
+ Load.each_in_file(path, &block)
25
+ end
18
26
 
19
27
  def load(input)
20
28
  Load.string(input)
21
29
  end
22
30
 
23
31
  def load_from_file(path)
24
- Load.file(path)
32
+ Load.string(File.read(path))
25
33
  end
26
-
34
+
27
35
  def dump(datasets)
28
36
  Dump.collection(datasets)
29
37
  end
@@ -1,5 +1,5 @@
1
1
  module Quandl
2
2
  module Format
3
- VERSION = "0.1.7"
3
+ VERSION = "0.1.8"
4
4
  end
5
5
  end
@@ -1,4 +1,4 @@
1
- code: "BLAKE_TEST_1"
1
+ code: "UNKNOWN_ATTRIBUTE_CODE"
2
2
  name: "A new title"
3
3
  this_attribute_does_not_exist: "Why is this here?"
4
4
  description: "The description Date, Open, High"
@@ -10,7 +10,7 @@ Date,Open,High,Low,Last,Close,Total Trade Quantity,Turnover (Lacs)
10
10
  code: "BLAKE_TEST_2"
11
11
  name: "A new title"
12
12
  description: "The description Date, Open, High"
13
- -
13
+ ----
14
14
  Date,Open,High,Low,Last,Close,Total Trade Quantity,Turnover (Lacs)
15
15
  2013-11-22,1252.0,454.95,448.2,450.0,450.0,1354405.0,6099.41
16
16
  2013-11-21,452.25,457.75,449.1,451.2,451.0,218881.0,992.94
@@ -21,4 +21,4 @@ description: "The description Date, Open, High"
21
21
  -
22
22
  Date,Open,High,Low,Last,Close,Total Trade Quantity,Turnover (Lacs)
23
23
  2013-11-22,1252.0,454.95,448.2,450.0,450.0,1354405.0,6099.41
24
- 2013-11-21,452.25,457.75,449.1,451.2,451.0,218881.0,992.94
24
+ 2013-11-21,452.25,457.75,449.1,451.2,451.0,218881.0,992.94
@@ -8,9 +8,10 @@ describe Quandl::Format::Dataset do
8
8
  { file: 'mismatched_columns', error: /Expected 4 but found 5/ },
9
9
  { file: 'mismatched_rows', error: /Expected 3 but found 4/ },
10
10
  { file: 'invalid_yaml', error: /could not find expected ':'/ },
11
- { file: 'missing_dashes', error: /Attribute parse error at line 6 column 1/ },
11
+ { file: 'missing_dashes', error: /Attribute parse error at line 28 column 1/ },
12
12
  { file: 'missing_dashes', error: /Did you forget to delimit the meta data section/ },
13
13
  ]
14
+
14
15
  # run each expectation
15
16
  expected_errors.each do |pair|
16
17
  it "#{pair[:file]}.qdf should error with #{pair[:error]}" do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: quandl_format
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Blake Hilscher
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-11 00:00:00.000000000 Z
11
+ date: 2013-12-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake