parseinput 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +41 -0
- data/lib/parse/input.rb +140 -0
- data/setup.rb +1360 -0
- data/test/tc_reading.rb +70 -0
- data/test/tc_state.rb +142 -0
- data/test/ts_all.rb +11 -0
- metadata +47 -0
data/test/tc_reading.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
#!/usr/local/bin/ruby -w
|
2
|
+
|
3
|
+
# tc_reading.rb
|
4
|
+
#
|
5
|
+
# Created by James Edward Gray II on 2005-08-14.
|
6
|
+
# Copyright 2005 Gray Productions. All rights reserved.
|
7
|
+
|
8
|
+
require "test/unit"
|
9
|
+
|
10
|
+
require "parse/input"
|
11
|
+
|
12
|
+
class TestReading < Test::Unit::TestCase
|
13
|
+
def test_line_by_line_read
|
14
|
+
path = File.join(File.dirname(__FILE__), "five_lines.txt")
|
15
|
+
lines = File.readlines(path)
|
16
|
+
test = self
|
17
|
+
|
18
|
+
Parse::Input.new(path) do
|
19
|
+
read { |line| test.assert_equal(lines.shift, line) }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_simple_interface
|
24
|
+
path = File.join(File.dirname(__FILE__), "five_lines.txt")
|
25
|
+
lines = File.readlines(path)
|
26
|
+
test = self
|
27
|
+
|
28
|
+
input(path) do
|
29
|
+
read { |line| test.assert_equal(lines.shift, line) }
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_paragraph_read
|
34
|
+
path = File.join(File.dirname(__FILE__), "five_paragraphs.txt")
|
35
|
+
paragraphs = File.readlines(path, "")
|
36
|
+
test = self
|
37
|
+
|
38
|
+
input(path, "") do
|
39
|
+
read { |paragraph| test.assert_equal(paragraphs.shift, paragraph) }
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_restricted_reading
|
44
|
+
path = File.join(File.dirname(__FILE__), "five_lines.txt")
|
45
|
+
numbers = %w{two three}
|
46
|
+
test = self
|
47
|
+
|
48
|
+
input(path) do
|
49
|
+
read(/ (t\w+)\./) do |number|
|
50
|
+
test.assert_equal("This is line #{numbers.first}.\n", @read)
|
51
|
+
test.assert_equal(numbers.shift, number)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_data_saving_and_retrieving
|
57
|
+
path = File.join(File.dirname(__FILE__), "five_lines.txt")
|
58
|
+
|
59
|
+
data = input path do
|
60
|
+
read(/ (o\w+)\./) { |number| @number = number }
|
61
|
+
read { |line| (@lines ||= Array.new) << line }
|
62
|
+
end
|
63
|
+
|
64
|
+
assert_equal("one", data[:number])
|
65
|
+
assert_equal("one", data.number)
|
66
|
+
assert_equal(path, data.path)
|
67
|
+
assert_equal(5, data.lines.size)
|
68
|
+
assert_equal(File.readlines(path), data.lines)
|
69
|
+
end
|
70
|
+
end
|
data/test/tc_state.rb
ADDED
@@ -0,0 +1,142 @@
|
|
1
|
+
#!/usr/local/bin/ruby -w
|
2
|
+
|
3
|
+
# tc_state.rb
|
4
|
+
#
|
5
|
+
# Created by James Edward Gray II on 2005-08-14.
|
6
|
+
# Copyright 2005 Gray Productions. All rights reserved.
|
7
|
+
|
8
|
+
require "test/unit"
|
9
|
+
|
10
|
+
require "parse/input"
|
11
|
+
|
12
|
+
class TestState < Test::Unit::TestCase
|
13
|
+
def test_single_skips
|
14
|
+
path = File.join(File.dirname(__FILE__), "five_lines.txt")
|
15
|
+
lines = File.readlines(path).values_at(0..2, 4)
|
16
|
+
test = self
|
17
|
+
|
18
|
+
input(path) do
|
19
|
+
skip(?u)
|
20
|
+
|
21
|
+
read { |line| test.assert_equal(lines.shift, line) }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_skipping_range
|
26
|
+
path = File.join(File.dirname(__FILE__), "five_lines.txt")
|
27
|
+
lines = File.readlines(path).values_at(0, 3..4)
|
28
|
+
test = self
|
29
|
+
|
30
|
+
input(path) do
|
31
|
+
start_skipping_at("two")
|
32
|
+
stop_skipping_at("three")
|
33
|
+
|
34
|
+
read { |line| test.assert_equal(lines.shift, line) }
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_searching_skipped
|
39
|
+
path = File.join(File.dirname(__FILE__), "five_lines.txt")
|
40
|
+
|
41
|
+
data = input(path) do
|
42
|
+
@state = :skip
|
43
|
+
stop_skipping_at("four")
|
44
|
+
find_in_skipped(/ou/) { |line| @found_in_skip = line }
|
45
|
+
|
46
|
+
read { |line| (@lines ||= Array.new) << line }
|
47
|
+
end
|
48
|
+
|
49
|
+
assert_equal("This is line four.\n", data.found_in_skip)
|
50
|
+
assert_equal(["This is line five.\n"], data.lines)
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_stop
|
54
|
+
path = File.join(File.dirname(__FILE__), "five_lines.txt")
|
55
|
+
|
56
|
+
data = input(path) do
|
57
|
+
stop_at("three")
|
58
|
+
|
59
|
+
read { |line| @last_line = line }
|
60
|
+
end
|
61
|
+
|
62
|
+
assert_equal("This is line two.\n", data.last_line)
|
63
|
+
|
64
|
+
data = input(path) do
|
65
|
+
read do |line|
|
66
|
+
@last_line = line
|
67
|
+
|
68
|
+
@state = :stop if @read.index("three")
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
assert_equal("This is line three.\n", data.last_line)
|
73
|
+
end
|
74
|
+
|
75
|
+
def test_pre_and_post
|
76
|
+
path = File.join(File.dirname(__FILE__), "five_lines.txt")
|
77
|
+
|
78
|
+
data = input(path) do
|
79
|
+
@pre = @post = 0
|
80
|
+
|
81
|
+
pre do
|
82
|
+
@pre += 1
|
83
|
+
@post = 0
|
84
|
+
end
|
85
|
+
read { |line| (@lines ||= Array.new) << line }
|
86
|
+
read(/\w+\./) { |number| (@numbers ||= Array.new) << number }
|
87
|
+
post { @post += 1 }
|
88
|
+
end
|
89
|
+
|
90
|
+
assert_equal(5, data.pre)
|
91
|
+
assert_equal(1, data.post)
|
92
|
+
end
|
93
|
+
|
94
|
+
def test_complex
|
95
|
+
path = File.join(File.dirname(__FILE__), "ross_report.txt")
|
96
|
+
test = self
|
97
|
+
|
98
|
+
input(path) do
|
99
|
+
@state = :skip
|
100
|
+
start_skipping_at("\f")
|
101
|
+
stop_skipping_at(/\A-[- ]+-\Z/)
|
102
|
+
skip(/\A\s*\Z/)
|
103
|
+
skip(/--\Z/)
|
104
|
+
|
105
|
+
find_in_skipped(/((?:Period|Week)\s+\d.+?)\s*\Z/) do |period|
|
106
|
+
test.assert_equal("Period 02/2002", period)
|
107
|
+
end
|
108
|
+
|
109
|
+
stop_at("*** Selection Criteria ***")
|
110
|
+
|
111
|
+
read do |line|
|
112
|
+
test.assert_match(/\A\s+(?:Sales|Cust|SA)|\A[-\w]+\s+/, line)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
path = File.join(File.dirname(__FILE__), "car_ads.txt")
|
117
|
+
|
118
|
+
data = input(path, "") do
|
119
|
+
@state = :skip
|
120
|
+
stop_skipping_at("Save Ad")
|
121
|
+
skip(/\A\s*\Z/)
|
122
|
+
|
123
|
+
pre { @price = @miles = nil }
|
124
|
+
read(/\$([\d,]+\d)/) { |price| @price = price.delete(",").to_i }
|
125
|
+
read(/([\d,]*\d)\s*m/) { |miles| @miles = miles.delete(",").to_i }
|
126
|
+
|
127
|
+
read do |ad|
|
128
|
+
if @price and @price < 20_000 and @miles and @miles < 40_000
|
129
|
+
(@ads ||= Array.new) << ad.strip
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
assert_equal([<<END_AD.strip], data.ads)
|
135
|
+
2003 Chrysler Town & Country LX
|
136
|
+
$16,990, green, 21,488 mi, air, pw, power locks, ps, power mirrors,
|
137
|
+
dual air bags, keyless entry, intermittent wipers, rear defroster, alloy,
|
138
|
+
pb, abs, cruise, am/fm stereo, CD, cassette, tinted glass
|
139
|
+
VIN:2C4GP44363R153238, Stock No:C153238, CALL DAN PERKINS AT 1-800-432-6326
|
140
|
+
END_AD
|
141
|
+
end
|
142
|
+
end
|
data/test/ts_all.rb
ADDED
metadata
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.8.11
|
3
|
+
specification_version: 1
|
4
|
+
name: parseinput
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 0.0.1
|
7
|
+
date: 2005-08-18 00:00:00 -05:00
|
8
|
+
summary: Parse Input is a chain-saw tool for data mining.
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: james@grayproductions.net
|
12
|
+
homepage: http://input.rubyforge.org
|
13
|
+
rubyforge_project: input
|
14
|
+
description: "Parse Input is a library that aids in parsing generic input with Ruby. This
|
15
|
+
isn't intended to be a full-blown parser, but instead a chain-saw tool for data
|
16
|
+
mining arbitrary inputs quickly and easily."
|
17
|
+
autorequire: parse/input
|
18
|
+
default_executable:
|
19
|
+
bindir: bin
|
20
|
+
has_rdoc: false
|
21
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
22
|
+
requirements:
|
23
|
+
-
|
24
|
+
- ">"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.0.0
|
27
|
+
version:
|
28
|
+
platform: ruby
|
29
|
+
signing_key:
|
30
|
+
cert_chain:
|
31
|
+
authors:
|
32
|
+
- James Edward Gray II
|
33
|
+
files:
|
34
|
+
- lib/parse/input.rb
|
35
|
+
- test/tc_reading.rb
|
36
|
+
- test/tc_state.rb
|
37
|
+
- test/ts_all.rb
|
38
|
+
- Rakefile
|
39
|
+
- setup.rb
|
40
|
+
test_files:
|
41
|
+
- test/ts_all.rb
|
42
|
+
rdoc_options: []
|
43
|
+
extra_rdoc_files: []
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
requirements: []
|
47
|
+
dependencies: []
|