text-data-tools 1.0.6 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +14 -1
- data/README.rdoc.orig +19 -0
- data/Rakefile +2 -0
- data/VERSION +1 -1
- data/lib/text-data-tools.rb +63 -28
- data/test/test_text-data-tools.rb +12 -12
- data/text-data-tools.gemspec +3 -2
- metadata +4 -2
data/README.rdoc
CHANGED
@@ -1,6 +1,19 @@
|
|
1
1
|
= text-data-tools
|
2
2
|
|
3
|
-
|
3
|
+
This is a set of tools for extracting data from simple text files, where the data appears in regular formats, for example columns.
|
4
|
+
|
5
|
+
== Examples
|
6
|
+
|
7
|
+
These are taken from the test suite.
|
8
|
+
|
9
|
+
Named variables:
|
10
|
+
file = TextDataTools::Named::DataFile.new('test/test_dat_2.dat', ':')
|
11
|
+
alpha_power = file.get_variable_value('Alpha power').to_f
|
12
|
+
|
13
|
+
Columnar Data:
|
14
|
+
file = TextDataTools::Column::DataFile.new('test/test_dat.dat', true, /\S+/, /(?:\#\s+)?\d:.*?(?=\d:)/)
|
15
|
+
array = file.get_2d_array(/i\+ temp/, /1.*time/)
|
16
|
+
|
4
17
|
|
5
18
|
== Contributing to text-data-tools
|
6
19
|
|
data/README.rdoc.orig
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
= text-data-tools
|
2
|
+
|
3
|
+
Description goes here.
|
4
|
+
|
5
|
+
== Contributing to text-data-tools
|
6
|
+
|
7
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
|
8
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
|
9
|
+
* Fork the project.
|
10
|
+
* Start a feature/bugfix branch.
|
11
|
+
* Commit and push until you are happy with your contribution.
|
12
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
13
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
14
|
+
|
15
|
+
== Copyright
|
16
|
+
|
17
|
+
Copyright (c) 2013 Edmund Highcock. See LICENSE.txt for
|
18
|
+
further details.
|
19
|
+
|
data/Rakefile
CHANGED
@@ -11,6 +11,7 @@ rescue Bundler::BundlerError => e
|
|
11
11
|
end
|
12
12
|
require 'rake'
|
13
13
|
|
14
|
+
|
14
15
|
require 'jeweler'
|
15
16
|
Jeweler::Tasks.new do |gem|
|
16
17
|
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
@@ -51,3 +52,4 @@ Rake::RDocTask.new do |rdoc|
|
|
51
52
|
rdoc.rdoc_files.include('README*')
|
52
53
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
53
54
|
end
|
55
|
+
Rake.application.options.trace = false
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.0
|
1
|
+
1.1.0
|
data/lib/text-data-tools.rb
CHANGED
@@ -1,6 +1,20 @@
|
|
1
1
|
require 'fileutils'
|
2
2
|
|
3
|
+
# This is a set of tools for extracting data from simple text files, where the data appears in regular formats, for example columns.
|
4
|
+
# For more information see the individual submodules.
|
3
5
|
module TextDataTools
|
6
|
+
|
7
|
+
class DataFileBase
|
8
|
+
|
9
|
+
def exists?
|
10
|
+
FileTest.exists?(@filename)
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
# Tools for extracting data from text files where the data appears in columns
|
16
|
+
# with or without headers for each column.
|
17
|
+
module Column
|
4
18
|
# Return a one-dimensional array containing data from the file filename,
|
5
19
|
# which may or may not have a line of column headers,
|
6
20
|
# in the column column_header, where column_header maybe either a string
|
@@ -127,23 +141,6 @@ module TextDataTools
|
|
127
141
|
class NotFoundError < StandardError
|
128
142
|
end
|
129
143
|
|
130
|
-
# Extract a variable value from the given file where the variable is defined
|
131
|
-
# in this form:
|
132
|
-
# name sep value
|
133
|
-
# E.g.
|
134
|
-
# heat = 4.0
|
135
|
-
def self.get_variable_value(filename, name, sep='=')
|
136
|
-
value = nil
|
137
|
-
File.open(filename) do |file|
|
138
|
-
while line= file.gets
|
139
|
-
next unless line =~ Regexp.new("#{Regexp.escape(name)}\\s*#{Regexp.escape(sep)}\\s*(?<value>.*)")
|
140
|
-
value = $~[:value]
|
141
|
-
|
142
|
-
end
|
143
|
-
end
|
144
|
-
raise NotFoundError.new("Can't find #{name} in #{filename}") unless value
|
145
|
-
value
|
146
|
-
end
|
147
144
|
def self.column_index_from_headers(line, column_header, header_match)
|
148
145
|
headers = line.scan(header_match)
|
149
146
|
#p headers
|
@@ -154,13 +151,13 @@ module TextDataTools
|
|
154
151
|
column_header = index_array.index(index_array.compact[0])
|
155
152
|
end
|
156
153
|
|
157
|
-
# This is a simple class which can interface with the methods of TextDataTools
|
154
|
+
# This is a simple class which can interface with the methods of TextDataTools::Column
|
158
155
|
# to prevent the user having to specify the file name and other properties of the
|
159
156
|
# data file for every call. In a
|
160
157
|
# nutshell, create a new instance of this class giving it the filename, and any
|
161
158
|
# appropriate options,
|
162
159
|
# then call methods from TextDataTools omitting the appropriate arguments.
|
163
|
-
class
|
160
|
+
class DataFile < DataFileBase
|
164
161
|
def initialize(filename, has_header_line = false, match = /\S+/, header_match = /\S+/)
|
165
162
|
@filename = filename
|
166
163
|
@match = match
|
@@ -169,25 +166,22 @@ module TextDataTools
|
|
169
166
|
self
|
170
167
|
end
|
171
168
|
def get_1d_array(column_header)
|
172
|
-
TextDataTools.get_1d_array(@filename, @has_header_line, column_header, @match, @header_match)
|
169
|
+
TextDataTools::Column.get_1d_array(@filename, @has_header_line, column_header, @match, @header_match)
|
173
170
|
end
|
174
171
|
def get_1d_array_float(column_header)
|
175
|
-
TextDataTools.get_1d_array_float(@filename, @has_header_line, column_header, @match, @header_match)
|
172
|
+
TextDataTools::Column.get_1d_array_float(@filename, @has_header_line, column_header, @match, @header_match)
|
176
173
|
end
|
177
174
|
def get_1d_array_integer(column_header)
|
178
|
-
TextDataTools.get_1d_array_integer(@filename, @has_header_line, column_header, @match, @header_match)
|
175
|
+
TextDataTools::Column.get_1d_array_integer(@filename, @has_header_line, column_header, @match, @header_match)
|
179
176
|
end
|
180
177
|
def get_2d_array(column_header, index_header)
|
181
|
-
TextDataTools.get_2d_array(@filename, @has_header_line, column_header, index_header, @match, @header_match)
|
178
|
+
TextDataTools::Column.get_2d_array(@filename, @has_header_line, column_header, index_header, @match, @header_match)
|
182
179
|
end
|
183
180
|
def get_2d_array_float(column_header, index_header)
|
184
|
-
TextDataTools.get_2d_array(@filename, @has_header_line, column_header, index_header, @match, @header_match)
|
181
|
+
TextDataTools::Column.get_2d_array(@filename, @has_header_line, column_header, index_header, @match, @header_match)
|
185
182
|
end
|
186
183
|
def get_2d_array_integer(column_header, index_header)
|
187
|
-
TextDataTools.get_2d_array(@filename, @has_header_line, column_header, index_header, @match, @header_match)
|
188
|
-
end
|
189
|
-
def get_variable_value(name, sep)
|
190
|
-
TextDataTools.get_variable_value(@filename, name, sep)
|
184
|
+
TextDataTools::Column.get_2d_array(@filename, @has_header_line, column_header, index_header, @match, @header_match)
|
191
185
|
end
|
192
186
|
def exists?
|
193
187
|
FileTest.exists?(@filename)
|
@@ -200,4 +194,45 @@ module TextDataTools
|
|
200
194
|
#end
|
201
195
|
#end
|
202
196
|
end
|
197
|
+
end
|
198
|
+
|
199
|
+
# Tools for dealing with files where named variables are assigned in the form
|
200
|
+
# name sep base
|
201
|
+
# E.g.
|
202
|
+
# height = 4.0
|
203
|
+
module Named
|
204
|
+
# Extract a variable value from the given file where the variable is defined
|
205
|
+
# in this form:
|
206
|
+
# name sep value
|
207
|
+
# E.g.
|
208
|
+
# heat = 4.0
|
209
|
+
def self.get_variable_value(filename, name, sep='=')
|
210
|
+
value = nil
|
211
|
+
File.open(filename) do |file|
|
212
|
+
while line= file.gets
|
213
|
+
next unless line =~ Regexp.new("#{Regexp.escape(name)}\\s*#{Regexp.escape(sep)}\\s*(?<value>.*)")
|
214
|
+
value = $~[:value]
|
215
|
+
|
216
|
+
end
|
217
|
+
end
|
218
|
+
raise NotFoundError.new("Can't find #{name} in #{filename}") unless value
|
219
|
+
value
|
220
|
+
end
|
221
|
+
# This is a simple class which can interface with the methods of TextDataTools::Named
|
222
|
+
# to prevent the user having to specify the file name and other properties of the
|
223
|
+
# data file for every call. In a
|
224
|
+
# nutshell, create a new instance of this class giving it the filename, and any
|
225
|
+
# appropriate options,
|
226
|
+
# then call methods from TextDataTools omitting the appropriate arguments.
|
227
|
+
class DataFile < DataFileBase
|
228
|
+
def initialize(filename, sep = ':')
|
229
|
+
@filename = filename
|
230
|
+
@sep = sep
|
231
|
+
self
|
232
|
+
end
|
233
|
+
def get_variable_value(name)
|
234
|
+
TextDataTools::Named.get_variable_value(@filename, name, @sep)
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
203
238
|
end # module TextDataTools
|
@@ -2,21 +2,21 @@ require 'helper'
|
|
2
2
|
|
3
3
|
class TestTextDataTools < Test::Unit::TestCase
|
4
4
|
def test_1d
|
5
|
-
assert_raise(ArgumentError){TextDataTools.get_1d_array('test/test_dat.dat', true, 2.2)}
|
6
|
-
assert_raise(ArgumentError){TextDataTools.get_1d_array('test/test_dat.dat',true, /ii\+ temp/, /\S+/, /(?:\#\s+)?\d:.*?(?=\d:)/)}
|
7
|
-
array = TextDataTools.get_1d_array('test/test_dat.dat', true, /i\+ temp/, /\S+/, /(?:\#\s+)?\d:.*?(?=\d:)/)
|
5
|
+
assert_raise(ArgumentError){TextDataTools::Column.get_1d_array('test/test_dat.dat', true, 2.2)}
|
6
|
+
assert_raise(ArgumentError){TextDataTools::Column.get_1d_array('test/test_dat.dat',true, /ii\+ temp/, /\S+/, /(?:\#\s+)?\d:.*?(?=\d:)/)}
|
7
|
+
array = TextDataTools::Column.get_1d_array('test/test_dat.dat', true, /i\+ temp/, /\S+/, /(?:\#\s+)?\d:.*?(?=\d:)/)
|
8
8
|
#puts array
|
9
9
|
assert_equal(array.size, 18)
|
10
10
|
assert_equal(array[9].to_f, 0.9753E+09)
|
11
|
-
array = TextDataTools.get_1d_array_float('test/test_dat.dat', true, /i\+ temp/, /\S+/, /(?:\#\s+)?\d:.*?(?=\d:)/)
|
11
|
+
array = TextDataTools::Column.get_1d_array_float('test/test_dat.dat', true, /i\+ temp/, /\S+/, /(?:\#\s+)?\d:.*?(?=\d:)/)
|
12
12
|
assert_equal(array[9], 0.9753E+09)
|
13
13
|
end
|
14
14
|
def test_2d
|
15
|
-
array = TextDataTools.get_2d_array('test/test_dat.dat', true, /i\+ temp/, 0, /\S+/, /(?:\#\s+)?\d:.*?(?=\d:)/)
|
15
|
+
array = TextDataTools::Column.get_2d_array('test/test_dat.dat', true, /i\+ temp/, 0, /\S+/, /(?:\#\s+)?\d:.*?(?=\d:)/)
|
16
16
|
assert_equal(array.size, 2)
|
17
|
-
array = TextDataTools.get_2d_array('test/test_dat.dat', true, /i\+ temp/, 1, /\S+/, /(?:\#\s+)?\d:.*?(?=\d:)/)
|
17
|
+
array = TextDataTools::Column.get_2d_array('test/test_dat.dat', true, /i\+ temp/, 1, /\S+/, /(?:\#\s+)?\d:.*?(?=\d:)/)
|
18
18
|
assert_equal(array.size, 18)
|
19
|
-
array = TextDataTools.get_2d_array_float('test/test_dat.dat', true, /i\+ temp/, 0, /\S+/, /(?:\#\s+)?\d:.*?(?=\d:)/)
|
19
|
+
array = TextDataTools::Column.get_2d_array_float('test/test_dat.dat', true, /i\+ temp/, 0, /\S+/, /(?:\#\s+)?\d:.*?(?=\d:)/)
|
20
20
|
assert_equal(array[0].size, 9)
|
21
21
|
assert_equal(array[1][0], 0.9753E+09)
|
22
22
|
|
@@ -24,15 +24,15 @@ class TestTextDataTools < Test::Unit::TestCase
|
|
24
24
|
|
25
25
|
end
|
26
26
|
def test_get_variable
|
27
|
-
variable = TextDataTools.get_variable_value('test/test_dat_2.dat', 'Q', ':')
|
27
|
+
variable = TextDataTools::Named.get_variable_value('test/test_dat_2.dat', 'Q', ':')
|
28
28
|
assert_equal(variable.to_f, 11.989644168449118)
|
29
|
-
variable = TextDataTools.get_variable_value('test/test_dat_2.dat', 'Fusion power', ':')
|
29
|
+
variable = TextDataTools::Named.get_variable_value('test/test_dat_2.dat', 'Fusion power', ':')
|
30
30
|
assert_equal(variable.to_f, 484.34196189744871)
|
31
31
|
end
|
32
32
|
def test_texdatafile_class
|
33
|
-
file = TextDataTools::
|
34
|
-
assert_equal(file.get_variable_value('Alpha power'
|
35
|
-
file = TextDataTools::
|
33
|
+
file = TextDataTools::Named::DataFile.new('test/test_dat_2.dat', ':')
|
34
|
+
assert_equal(file.get_variable_value('Alpha power').to_f, 116.90499891894469 )
|
35
|
+
file = TextDataTools::Column::DataFile.new('test/test_dat.dat', true, /\S+/, /(?:\#\s+)?\d:.*?(?=\d:)/)
|
36
36
|
array = file.get_2d_array(/i\+ temp/, /1.*time/)
|
37
37
|
assert_equal(array.size, 2)
|
38
38
|
assert(file.exists?)
|
data/text-data-tools.gemspec
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "text-data-tools"
|
8
|
-
s.version = "1.0
|
8
|
+
s.version = "1.1.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Edmund Highcock"]
|
@@ -14,7 +14,8 @@ Gem::Specification.new do |s|
|
|
14
14
|
s.email = "edmundhighcock@sourceforge.net"
|
15
15
|
s.extra_rdoc_files = [
|
16
16
|
"LICENSE.txt",
|
17
|
-
"README.rdoc"
|
17
|
+
"README.rdoc",
|
18
|
+
"README.rdoc.orig"
|
18
19
|
]
|
19
20
|
s.files = [
|
20
21
|
".document",
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text-data-tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -83,6 +83,7 @@ extensions: []
|
|
83
83
|
extra_rdoc_files:
|
84
84
|
- LICENSE.txt
|
85
85
|
- README.rdoc
|
86
|
+
- README.rdoc.orig
|
86
87
|
files:
|
87
88
|
- .document
|
88
89
|
- Gemfile
|
@@ -96,6 +97,7 @@ files:
|
|
96
97
|
- test/test_dat_2.dat
|
97
98
|
- test/test_text-data-tools.rb
|
98
99
|
- text-data-tools.gemspec
|
100
|
+
- README.rdoc.orig
|
99
101
|
homepage: http://github.com/edmundhighcock/text-data-tools
|
100
102
|
licenses:
|
101
103
|
- GPLv3
|
@@ -111,7 +113,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
111
113
|
version: '0'
|
112
114
|
segments:
|
113
115
|
- 0
|
114
|
-
hash:
|
116
|
+
hash: 534969954448089810
|
115
117
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
116
118
|
none: false
|
117
119
|
requirements:
|