ariel 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +21 -0
- data/README +98 -0
- data/bin/ariel +56 -0
- data/examples/google_calculator/labeled/1 +43 -0
- data/examples/google_calculator/labeled/2 +41 -0
- data/examples/google_calculator/labeled/3 +41 -0
- data/examples/google_calculator/structure.rb +12 -0
- data/examples/google_calculator/structure.yaml +46 -0
- data/examples/google_calculator/unlabeled/1 +43 -0
- data/examples/google_calculator/unlabeled/2 +43 -0
- data/examples/raa/labeled/highline.html +135 -0
- data/examples/raa/labeled/mongrel.html +168 -0
- data/examples/raa/structure.rb +17 -0
- data/examples/raa/structure.yaml +183 -0
- data/examples/raa/unlabeled/pdf-writer.html +175 -0
- data/lib/ariel/candidate_selector.rb +94 -0
- data/lib/ariel/example_document_loader.rb +59 -0
- data/lib/ariel/extracted_node.rb +20 -0
- data/lib/ariel/label_utils.rb +71 -0
- data/lib/ariel/learner.rb +237 -0
- data/lib/ariel/node_like.rb +26 -0
- data/lib/ariel/rule.rb +112 -0
- data/lib/ariel/rule_set.rb +34 -0
- data/lib/ariel/structure_node.rb +75 -0
- data/lib/ariel/token.rb +68 -0
- data/lib/ariel/token_stream.rb +240 -0
- data/lib/ariel/wildcards.rb +33 -0
- data/lib/ariel.rb +69 -0
- data/test/ariel_test_case.rb +15 -0
- data/test/fixtures.rb +43 -0
- data/test/specs/token_spec.rb +65 -0
- data/test/specs/token_stream_spec.rb +43 -0
- data/test/specs/wildcards_spec.rb +26 -0
- data/test/test_candidate_selector.rb +58 -0
- data/test/test_example_document_loader.rb +7 -0
- data/test/test_label_utils.rb +15 -0
- data/test/test_learner.rb +38 -0
- data/test/test_rule.rb +38 -0
- data/test/test_structure_node.rb +81 -0
- data/test/test_token.rb +16 -0
- data/test/test_token_stream.rb +82 -0
- data/test/test_wildcards.rb +18 -0
- metadata +103 -0
@@ -0,0 +1,168 @@
|
|
1
|
+
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<base href="http://raa.ruby-lang.org/">
|
5
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
6
|
+
<meta name="Author" content="ruby-lang.org">
|
7
|
+
<meta http-equiv="content-style-type" content="text/css">
|
8
|
+
<link rev="made" href="mailto:raa-admin@ruby-lang.org">
|
9
|
+
<link rel="home" href="http://raa.ruby-lang.org/">
|
10
|
+
<link rel="index" href="index.html">
|
11
|
+
<link rel="search" href="search.rhtml">
|
12
|
+
<link rel="glossary" href="all.html">
|
13
|
+
<link rel="SHORTCUT ICON" href="/favicon.ico">
|
14
|
+
<link rel="stylesheet" href="raa.css" type="text/css" media="all">
|
15
|
+
|
16
|
+
<title>RAA - <l:name>mongrel</l:name></title>
|
17
|
+
</head>
|
18
|
+
|
19
|
+
<body>
|
20
|
+
<div class="header">
|
21
|
+
<h1>RAA - mongrel</h1>
|
22
|
+
</div>
|
23
|
+
|
24
|
+
<form method="get" action="search.rhtml">
|
25
|
+
<div class="header-searchbox">
|
26
|
+
<input name="search" type="text" size="20" maxlength="63"><input type="submit" value="Search"><br>
|
27
|
+
<a href="index.html#search">advanced search</a>
|
28
|
+
</div>
|
29
|
+
</form>
|
30
|
+
|
31
|
+
|
32
|
+
<p class="caption">
|
33
|
+
mongrel / <l:current_version>0.3.12</l:current_version>
|
34
|
+
</p>
|
35
|
+
|
36
|
+
<table class="entry">
|
37
|
+
|
38
|
+
<tr><th>Short description: </th>
|
39
|
+
<td><l:short_description>Fast HTTP 1.1 library and server for Ruby web applications.</l:short_description></td>
|
40
|
+
</tr>
|
41
|
+
<tr><th>Category: </th>
|
42
|
+
<td><l:category><a href="cat.rhtml?category_major=Library">Library</a>/<a href="cat.rhtml?category_major=Library;category_minor=Web">Web</a></l:category></td>
|
43
|
+
</tr>
|
44
|
+
<tr><th>Status: </th>
|
45
|
+
<td>beta</td>
|
46
|
+
</tr>
|
47
|
+
<tr><th>Created: </th>
|
48
|
+
<td>2006-02-12 21:12:33 GMT</td>
|
49
|
+
</tr>
|
50
|
+
<tr>
|
51
|
+
<th>Last update: </th>
|
52
|
+
<td>2006-03-30 10:42:09 GMT</td>
|
53
|
+
</tr>
|
54
|
+
<tr><th>Owner: </th>
|
55
|
+
<td><a href="mailto:zedshaw@zedshaw.com"><l:owner>Zed A. Shaw</l:owner></a>
|
56
|
+
(<a href="owner.rhtml?id=1821">Projects of this owner</a>)</td>
|
57
|
+
</tr>
|
58
|
+
<tr><th>Homepage: </th>
|
59
|
+
<td><a href="http://mongrel.rubyforge.org/"><l:homepage>http://mongrel.rubyforge.org/</l:homepage></a></td>
|
60
|
+
</tr>
|
61
|
+
<tr><th>Download: </th>
|
62
|
+
<td>
|
63
|
+
<a href="http://rubyforge.org/frs/?group_id=1306">http://rubyforge.org/frs/?group_id=1306</a>
|
64
|
+
|
65
|
+
</td>
|
66
|
+
</tr>
|
67
|
+
|
68
|
+
<tr><th>License: </th>
|
69
|
+
<td><l:license>LGPL</l:license></td>
|
70
|
+
</tr>
|
71
|
+
<tr><th>Dependency: </th>
|
72
|
+
<td colspan='5'>
|
73
|
+
<table>
|
74
|
+
|
75
|
+
<tr width="100%">
|
76
|
+
<td nowrap>Requires:</td>
|
77
|
+
<td nowrap>
|
78
|
+
|
79
|
+
<a href="project/daemons/0.4.2">daemons/0.4.2</a>(*)
|
80
|
+
|
81
|
+
</td>
|
82
|
+
<td width="100%">Used for Ruby on Rails support</td>
|
83
|
+
</tr>
|
84
|
+
|
85
|
+
<tr width="100%">
|
86
|
+
<td nowrap>Requires:</td>
|
87
|
+
<td nowrap>
|
88
|
+
|
89
|
+
<a href="project/camping/">camping/1.2</a>(+)
|
90
|
+
|
91
|
+
</td>
|
92
|
+
<td width="100%">Optional if you want to run the examples for Camping.</td>
|
93
|
+
</tr>
|
94
|
+
|
95
|
+
<tr width="100%">
|
96
|
+
<td nowrap>Requires:</td>
|
97
|
+
<td nowrap>
|
98
|
+
|
99
|
+
<a href="project/rails/">rails/1.0</a>(+)
|
100
|
+
|
101
|
+
</td>
|
102
|
+
<td width="100%">Optional if you want the mongrel_rails runner.</td>
|
103
|
+
</tr>
|
104
|
+
|
105
|
+
<tr width="100%">
|
106
|
+
<td nowrap>Requires:</td>
|
107
|
+
<td nowrap>
|
108
|
+
|
109
|
+
<a href="project/win32-service/0.5.0">win32-service/0.5.0</a>(*)
|
110
|
+
|
111
|
+
</td>
|
112
|
+
<td width="100%">For win32-service stuff. Mongrel project provides a gem.</td>
|
113
|
+
</tr>
|
114
|
+
|
115
|
+
<tr><td colspan="3">(*): newer version exists</td></tr>
|
116
|
+
|
117
|
+
<tr><td colspan="3">(+): no such version</td></tr>
|
118
|
+
|
119
|
+
</table>
|
120
|
+
</td>
|
121
|
+
</tr>
|
122
|
+
<tr><th>Description: </th>
|
123
|
+
<td><p>Mongrel is a fast HTTP library and server for Ruby that is intended for hosting Ruby web applications of any kind using plain HTTP rather than FastCGI or SCGI. It is framework agnostic and already supports Ruby On Rails, Og+Nitro, and Camping frameworks. Runs on any POSIX system and Win32 (including service support under Win32). It also supports a complete plugin system based on RubyGems called GemPlugins.</p>
|
124
|
+
</td>
|
125
|
+
</tr>
|
126
|
+
|
127
|
+
<tr><th>Versions: </th>
|
128
|
+
<td>
|
129
|
+
<l:version_history>[<a href="project/mongrel/0.3.12">0.3.12</a> (2006-03-30)]
|
130
|
+
|
131
|
+
[<a href="project/mongrel/0.3.11">0.3.11</a> (2006-03-15)]
|
132
|
+
|
133
|
+
[<a href="project/mongrel/0.3.10">0.3.10</a> (2006-03-12)]
|
134
|
+
|
135
|
+
[<a href="project/mongrel/0.3.9">0.3.9</a> (2006-03-06)]
|
136
|
+
|
137
|
+
[<a href="project/mongrel/0.3.8">0.3.8</a> (2006-03-04)]
|
138
|
+
|
139
|
+
[<a href="project/mongrel/0.3.6">0.3.6</a> (2006-02-23)]
|
140
|
+
|
141
|
+
[<a href="project/mongrel/0.3.2">0.3.2</a> (2006-02-13)]
|
142
|
+
|
143
|
+
[<a href="project/mongrel/0.3.1">0.3.1</a> (2006-02-12)]</l:version_history>
|
144
|
+
|
145
|
+
</td>
|
146
|
+
</tr>
|
147
|
+
|
148
|
+
</table>
|
149
|
+
|
150
|
+
<p class="caption">
|
151
|
+
<a href="list.rhtml?name=mongrel">Edit this project (for project owner)</a>
|
152
|
+
</p>
|
153
|
+
|
154
|
+
<p class="caption">
|
155
|
+
<a href="index.html">back to RAA top</a>
|
156
|
+
</p>
|
157
|
+
|
158
|
+
<div class="footer">
|
159
|
+
<hr>
|
160
|
+
<address>
|
161
|
+
For all questions or comments, or if you have any inquiries about this page, contact <a title="Send Feedback for RAA" href="mailto:raa-admin@ruby-lang.org">raa-admin@ruby-lang.org</a>.
|
162
|
+
</address>
|
163
|
+
</div>
|
164
|
+
|
165
|
+
</body>
|
166
|
+
</html>
|
167
|
+
|
168
|
+
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'ariel'
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
structure = Ariel::StructureNode.new do |r|
|
5
|
+
r.item :name
|
6
|
+
r.item :current_version
|
7
|
+
r.item :short_description
|
8
|
+
r.item :category
|
9
|
+
r.item :owner
|
10
|
+
r.item :homepage
|
11
|
+
r.item :license
|
12
|
+
r.item :version_history
|
13
|
+
end
|
14
|
+
|
15
|
+
File.open('structure.yaml', 'wb') do |file|
|
16
|
+
YAML.dump structure, file
|
17
|
+
end
|
@@ -0,0 +1,183 @@
|
|
1
|
+
--- &id001 !ruby/object:Ariel::StructureNode
|
2
|
+
children:
|
3
|
+
:version_history: !ruby/object:Ariel::StructureNode
|
4
|
+
children: {}
|
5
|
+
|
6
|
+
meta: !ruby/object:OpenStruct
|
7
|
+
table:
|
8
|
+
:name: :version_history
|
9
|
+
:node_type: :not_list
|
10
|
+
parent: *id001
|
11
|
+
ruleset: !ruby/object:Ariel::RuleSet
|
12
|
+
end_rules:
|
13
|
+
- !ruby/object:Ariel::Rule
|
14
|
+
direction: :back
|
15
|
+
landmarks:
|
16
|
+
- - </td>
|
17
|
+
start_rules:
|
18
|
+
- !ruby/object:Ariel::Rule
|
19
|
+
direction: :forward
|
20
|
+
landmarks:
|
21
|
+
- - <td>
|
22
|
+
- - Versions
|
23
|
+
- - <td>
|
24
|
+
:short_description: !ruby/object:Ariel::StructureNode
|
25
|
+
children: {}
|
26
|
+
|
27
|
+
meta: !ruby/object:OpenStruct
|
28
|
+
table:
|
29
|
+
:name: :short_description
|
30
|
+
:node_type: :not_list
|
31
|
+
parent: *id001
|
32
|
+
ruleset: !ruby/object:Ariel::RuleSet
|
33
|
+
end_rules:
|
34
|
+
- !ruby/object:Ariel::Rule
|
35
|
+
direction: :back
|
36
|
+
landmarks:
|
37
|
+
- - </td>
|
38
|
+
- - Category
|
39
|
+
- - </td>
|
40
|
+
start_rules:
|
41
|
+
- !ruby/object:Ariel::Rule
|
42
|
+
direction: :forward
|
43
|
+
landmarks:
|
44
|
+
- - <td>
|
45
|
+
:current_version: !ruby/object:Ariel::StructureNode
|
46
|
+
children: {}
|
47
|
+
|
48
|
+
meta: !ruby/object:OpenStruct
|
49
|
+
table:
|
50
|
+
:name: :current_version
|
51
|
+
:node_type: :not_list
|
52
|
+
parent: *id001
|
53
|
+
ruleset: !ruby/object:Ariel::RuleSet
|
54
|
+
end_rules:
|
55
|
+
- !ruby/object:Ariel::Rule
|
56
|
+
direction: :back
|
57
|
+
landmarks:
|
58
|
+
- - </p>
|
59
|
+
- - table
|
60
|
+
- - </p>
|
61
|
+
start_rules:
|
62
|
+
- !ruby/object:Ariel::Rule
|
63
|
+
direction: :forward
|
64
|
+
landmarks:
|
65
|
+
- - /
|
66
|
+
- - caption
|
67
|
+
- - /
|
68
|
+
:homepage: !ruby/object:Ariel::StructureNode
|
69
|
+
children: {}
|
70
|
+
|
71
|
+
meta: !ruby/object:OpenStruct
|
72
|
+
table:
|
73
|
+
:name: :homepage
|
74
|
+
:node_type: :not_list
|
75
|
+
parent: *id001
|
76
|
+
ruleset: !ruby/object:Ariel::RuleSet
|
77
|
+
end_rules:
|
78
|
+
- !ruby/object:Ariel::Rule
|
79
|
+
direction: :back
|
80
|
+
landmarks:
|
81
|
+
- - </a>
|
82
|
+
- - Download
|
83
|
+
- - </a>
|
84
|
+
start_rules:
|
85
|
+
- !ruby/object:Ariel::Rule
|
86
|
+
direction: :forward
|
87
|
+
landmarks:
|
88
|
+
- - ">"
|
89
|
+
- - rubyforge
|
90
|
+
- - ">"
|
91
|
+
:category: !ruby/object:Ariel::StructureNode
|
92
|
+
children: {}
|
93
|
+
|
94
|
+
meta: !ruby/object:OpenStruct
|
95
|
+
table:
|
96
|
+
:name: :category
|
97
|
+
:node_type: :not_list
|
98
|
+
parent: *id001
|
99
|
+
ruleset: !ruby/object:Ariel::RuleSet
|
100
|
+
end_rules:
|
101
|
+
- !ruby/object:Ariel::Rule
|
102
|
+
direction: :back
|
103
|
+
landmarks:
|
104
|
+
- - </td>
|
105
|
+
- - Status
|
106
|
+
- - </td>
|
107
|
+
start_rules:
|
108
|
+
- !ruby/object:Ariel::Rule
|
109
|
+
direction: :forward
|
110
|
+
landmarks:
|
111
|
+
- - <td>
|
112
|
+
- - <td>
|
113
|
+
:name: !ruby/object:Ariel::StructureNode
|
114
|
+
children: {}
|
115
|
+
|
116
|
+
meta: !ruby/object:OpenStruct
|
117
|
+
table:
|
118
|
+
:name: :name
|
119
|
+
:node_type: :not_list
|
120
|
+
parent: *id001
|
121
|
+
ruleset: !ruby/object:Ariel::RuleSet
|
122
|
+
end_rules:
|
123
|
+
- !ruby/object:Ariel::Rule
|
124
|
+
direction: :back
|
125
|
+
landmarks:
|
126
|
+
- - </title>
|
127
|
+
start_rules:
|
128
|
+
- !ruby/object:Ariel::Rule
|
129
|
+
direction: :forward
|
130
|
+
landmarks:
|
131
|
+
- - "-"
|
132
|
+
- - RAA
|
133
|
+
- "-"
|
134
|
+
:owner: !ruby/object:Ariel::StructureNode
|
135
|
+
children: {}
|
136
|
+
|
137
|
+
meta: !ruby/object:OpenStruct
|
138
|
+
table:
|
139
|
+
:name: :owner
|
140
|
+
:node_type: :not_list
|
141
|
+
parent: *id001
|
142
|
+
ruleset: !ruby/object:Ariel::RuleSet
|
143
|
+
end_rules:
|
144
|
+
- !ruby/object:Ariel::Rule
|
145
|
+
direction: :back
|
146
|
+
landmarks:
|
147
|
+
- - </a>
|
148
|
+
- - id
|
149
|
+
- - </a>
|
150
|
+
start_rules:
|
151
|
+
- !ruby/object:Ariel::Rule
|
152
|
+
direction: :forward
|
153
|
+
landmarks:
|
154
|
+
- - ">"
|
155
|
+
- - Owner
|
156
|
+
- - ">"
|
157
|
+
:license: !ruby/object:Ariel::StructureNode
|
158
|
+
children: {}
|
159
|
+
|
160
|
+
meta: !ruby/object:OpenStruct
|
161
|
+
table:
|
162
|
+
:name: :license
|
163
|
+
:node_type: :not_list
|
164
|
+
parent: *id001
|
165
|
+
ruleset: !ruby/object:Ariel::RuleSet
|
166
|
+
end_rules:
|
167
|
+
- !ruby/object:Ariel::Rule
|
168
|
+
direction: :back
|
169
|
+
landmarks:
|
170
|
+
- - </td>
|
171
|
+
- - Dependency
|
172
|
+
- - </td>
|
173
|
+
start_rules:
|
174
|
+
- !ruby/object:Ariel::Rule
|
175
|
+
direction: :forward
|
176
|
+
landmarks:
|
177
|
+
- - <td>
|
178
|
+
- - License
|
179
|
+
- - <td>
|
180
|
+
meta: !ruby/object:OpenStruct
|
181
|
+
table:
|
182
|
+
:name: :root
|
183
|
+
:node_type: :not_list
|
@@ -0,0 +1,175 @@
|
|
1
|
+
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<base href="http://raa.ruby-lang.org/">
|
5
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
6
|
+
<meta name="Author" content="ruby-lang.org">
|
7
|
+
<meta http-equiv="content-style-type" content="text/css">
|
8
|
+
<link rev="made" href="mailto:raa-admin@ruby-lang.org">
|
9
|
+
<link rel="home" href="http://raa.ruby-lang.org/">
|
10
|
+
<link rel="index" href="index.html">
|
11
|
+
<link rel="search" href="search.rhtml">
|
12
|
+
<link rel="glossary" href="all.html">
|
13
|
+
<link rel="SHORTCUT ICON" href="/favicon.ico">
|
14
|
+
<link rel="stylesheet" href="raa.css" type="text/css" media="all">
|
15
|
+
|
16
|
+
<title>RAA - pdf-writer</title>
|
17
|
+
</head>
|
18
|
+
|
19
|
+
<body>
|
20
|
+
<div class="header">
|
21
|
+
<h1>RAA - pdf-writer</h1>
|
22
|
+
</div>
|
23
|
+
|
24
|
+
<form method="get" action="search.rhtml">
|
25
|
+
<div class="header-searchbox">
|
26
|
+
<input name="search" type="text" size="20" maxlength="63"><input type="submit" value="Search"><br>
|
27
|
+
<a href="index.html#search">advanced search</a>
|
28
|
+
</div>
|
29
|
+
</form>
|
30
|
+
|
31
|
+
|
32
|
+
<p class="caption">
|
33
|
+
pdf-writer / 1.1.3
|
34
|
+
</p>
|
35
|
+
|
36
|
+
<table class="entry">
|
37
|
+
|
38
|
+
<tr><th>Short description: </th>
|
39
|
+
<td>Native Ruby PDF Document Writer</td>
|
40
|
+
</tr>
|
41
|
+
<tr><th>Category: </th>
|
42
|
+
<td><a href="cat.rhtml?category_major=Library">Library</a>/<a href="cat.rhtml?category_major=Library;category_minor=Document">Document</a></td>
|
43
|
+
</tr>
|
44
|
+
<tr><th>Status: </th>
|
45
|
+
<td>Stable</td>
|
46
|
+
</tr>
|
47
|
+
<tr><th>Created: </th>
|
48
|
+
<td>2003-09-17 03:59:34 GMT</td>
|
49
|
+
</tr>
|
50
|
+
<tr>
|
51
|
+
<th>Last update: </th>
|
52
|
+
<td>2005-09-10 02:12:20 GMT</td>
|
53
|
+
</tr>
|
54
|
+
<tr><th>Owner: </th>
|
55
|
+
<td><a href="mailto:austin+raa@halostatue.ca">Austin Ziegler</a>
|
56
|
+
(<a href="owner.rhtml?id=788">Projects of this owner</a>)</td>
|
57
|
+
</tr>
|
58
|
+
<tr><th>Homepage: </th>
|
59
|
+
<td><a href="http://ruby-pdf.rubyforge.org/pdf-writer/">http://ruby-pdf.rubyforge.org/pdf-writer/</a></td>
|
60
|
+
</tr>
|
61
|
+
<tr><th>Download: </th>
|
62
|
+
<td>
|
63
|
+
<a href="http://rubyforge.org/frs/?group_id=81">http://rubyforge.org/frs/?group_id=81</a>
|
64
|
+
|
65
|
+
</td>
|
66
|
+
</tr>
|
67
|
+
|
68
|
+
<tr><th>License: </th>
|
69
|
+
<td>BSD-type</td>
|
70
|
+
</tr>
|
71
|
+
<tr><th>Dependency: </th>
|
72
|
+
<td colspan='5'>
|
73
|
+
<table>
|
74
|
+
|
75
|
+
<tr width="100%">
|
76
|
+
<td nowrap>Requires:</td>
|
77
|
+
<td nowrap>
|
78
|
+
|
79
|
+
<a href="project/trans-simple/1.3.0">trans-simple/1.3.0</a>
|
80
|
+
|
81
|
+
</td>
|
82
|
+
<td width="100%">Transaction::Simple</td>
|
83
|
+
</tr>
|
84
|
+
|
85
|
+
<tr width="100%">
|
86
|
+
<td nowrap>Requires:</td>
|
87
|
+
<td nowrap>
|
88
|
+
|
89
|
+
<a href="project/color-tools/1.3.0">color-tools/1.3.0</a>
|
90
|
+
|
91
|
+
</td>
|
92
|
+
<td width="100%">color-tools</td>
|
93
|
+
</tr>
|
94
|
+
|
95
|
+
</table>
|
96
|
+
</td>
|
97
|
+
</tr>
|
98
|
+
<tr><th>Description: </th>
|
99
|
+
<td><h1>PDF::Writer for Ruby</h1>
|
100
|
+
<p>
|
101
|
+
This library provides the ability to create PDF documents using only native
|
102
|
+
Ruby libraries. There are several demo programs available in the demo/
|
103
|
+
directory. The canonical documentation for PDF::Writer is
|
104
|
+
"manual.pdf", which can be generated using bin/techbook (just
|
105
|
+
"techbook" for RubyGem users) and the manual file
|
106
|
+
"manual.pwd".
|
107
|
+
</p>
|
108
|
+
<table>
|
109
|
+
<tr><td valign="top">Homepage:</td><td><a
|
110
|
+
href="http://rubyforge.org/projects/ruby-pdf">rubyforge.org/projects/ruby-pdf</a>/
|
111
|
+
|
112
|
+
</td></tr>
|
113
|
+
<tr><td valign="top">Copyright:</td><td>2003—2005, Austin Ziegler
|
114
|
+
|
115
|
+
</td></tr>
|
116
|
+
</table>
|
117
|
+
<p>
|
118
|
+
This software is based on Adobe’s PDF Reference, Fifth Edition,
|
119
|
+
version 1.6. This and earlier editions are available from Adobe’s PDF
|
120
|
+
developer <a
|
121
|
+
href="http://partners.adobe.com/public/developer/pdf/index_reference.html">website</a>.
|
122
|
+
</p>
|
123
|
+
<h2>LICENCE NOTES</h2>
|
124
|
+
<p>
|
125
|
+
Please read the file LICENCE for licensing restrictions on this library, as
|
126
|
+
well as important patent considerations.
|
127
|
+
</p>
|
128
|
+
<h2>Requirements</h2>
|
129
|
+
<p>
|
130
|
+
PDF::Writer requires Ruby 1.8.2 or better, color-tools 1.3.0 or better, and
|
131
|
+
Transaction::Simple 1.3.0 or better.
|
132
|
+
</p>
|
133
|
+
</td>
|
134
|
+
</tr>
|
135
|
+
|
136
|
+
<tr><th>Versions: </th>
|
137
|
+
<td>
|
138
|
+
[<a href="project/pdf-writer/1.1.3">1.1.3</a> (2005-09-10)]
|
139
|
+
|
140
|
+
[<a href="project/pdf-writer/1.1.2">1.1.2</a> (2005-08-25)]
|
141
|
+
|
142
|
+
[<a href="project/pdf-writer/1.1.1">1.1.1</a> (2005-07-01)]
|
143
|
+
|
144
|
+
[<a href="project/pdf-writer/1.1.0">1.1.0</a> (2005-06-30)]
|
145
|
+
|
146
|
+
[<a href="project/pdf-writer/1.0.1">1.0.1</a> (2005-06-13)]
|
147
|
+
|
148
|
+
[<a href="project/pdf-writer/1.0.0">1.0.0</a> (2005-06-13)]
|
149
|
+
|
150
|
+
[<a href="project/pdf-writer/Technology%20Preview">Technology Preview</a> (2004-06-14)]
|
151
|
+
|
152
|
+
</td>
|
153
|
+
</tr>
|
154
|
+
|
155
|
+
</table>
|
156
|
+
|
157
|
+
<p class="caption">
|
158
|
+
<a href="list.rhtml?name=pdf-writer">Edit this project (for project owner)</a>
|
159
|
+
</p>
|
160
|
+
|
161
|
+
<p class="caption">
|
162
|
+
<a href="index.html">back to RAA top</a>
|
163
|
+
</p>
|
164
|
+
|
165
|
+
<div class="footer">
|
166
|
+
<hr>
|
167
|
+
<address>
|
168
|
+
For all questions or comments, or if you have any inquiries about this page, contact <a title="Send Feedback for RAA" href="mailto:raa-admin@ruby-lang.org">raa-admin@ruby-lang.org</a>.
|
169
|
+
</address>
|
170
|
+
</div>
|
171
|
+
|
172
|
+
</body>
|
173
|
+
</html>
|
174
|
+
|
175
|
+
|
@@ -0,0 +1,94 @@
|
|
1
|
+
module Ariel
|
2
|
+
|
3
|
+
# Given an array of candidate Rules, and an array of LabeledStreams,
|
4
|
+
# allows heuristics to be applied to select the ideal Rule. All select_* instance
|
5
|
+
# methods will remove candidates from the internal candidates array.
|
6
|
+
class CandidateSelector
|
7
|
+
|
8
|
+
attr_accessor :candidates
|
9
|
+
def initialize(candidates, examples)
|
10
|
+
@candidates=candidates.dup #Just in case a CandidateSelector function directly modifies the array, affecting the original. Shouldn't happen.
|
11
|
+
@examples=examples
|
12
|
+
end
|
13
|
+
|
14
|
+
# Selects the Rule candidates that have the most matches of a given type
|
15
|
+
# against the given examples. e.g. select_best_by_match_type(:early, :perfect)
|
16
|
+
# will select the rules that have the most matches that are early or
|
17
|
+
# perfect.
|
18
|
+
def select_best_by_match_type(*match_types)
|
19
|
+
debug "Selecting best by match types #{match_types}"
|
20
|
+
return @candidates if @candidates.size==1
|
21
|
+
@candidates = highest_scoring_by do |rule|
|
22
|
+
rule_score=0
|
23
|
+
@examples.each do |example|
|
24
|
+
rule_score+=1 if rule.matches(example, *match_types)
|
25
|
+
end
|
26
|
+
rule_score #why doesn't return rule_score raise an error?
|
27
|
+
end
|
28
|
+
return @candidates
|
29
|
+
end
|
30
|
+
|
31
|
+
# All scoring functions use this indirectly. It iterates over each
|
32
|
+
# Rule candidate, and assigns it a score in a hash of index:score pairs.
|
33
|
+
# Each rule is yielded to the given block, which is expected to return that
|
34
|
+
# rule's score.
|
35
|
+
def score_by
|
36
|
+
score_hash={}
|
37
|
+
@candidates.each_with_index do |rule, index|
|
38
|
+
score_hash[index]= yield rule
|
39
|
+
end
|
40
|
+
return score_hash
|
41
|
+
end
|
42
|
+
|
43
|
+
# Takes a scoring function as a block, and yields each rule to it. Returns
|
44
|
+
# an array of the Rule candidates that have the highest score.
|
45
|
+
def highest_scoring_by(&scorer)
|
46
|
+
score_hash = score_by &scorer
|
47
|
+
best_score = score_hash.values.sort.last
|
48
|
+
highest_scorers=[]
|
49
|
+
score_hash.each do |candidate_index, score|
|
50
|
+
highest_scorers << @candidates[candidate_index] if score==best_score
|
51
|
+
end
|
52
|
+
debug "#{highest_scorers.size} highest_scorers were found, with a score of #{best_score}"
|
53
|
+
return highest_scorers
|
54
|
+
end
|
55
|
+
|
56
|
+
def select_with_fewer_wildcards
|
57
|
+
debug "Selecting the rules with the fewest wildcards"
|
58
|
+
@candidates = highest_scoring_by {|rule| -rule.wildcard_count} #hack or not?
|
59
|
+
return @candidates
|
60
|
+
end
|
61
|
+
|
62
|
+
def select_closest_to_label
|
63
|
+
debug "Selecting rules that match the examples closest to the label"
|
64
|
+
@candidates = highest_scoring_by do |rule|
|
65
|
+
rule_score=0
|
66
|
+
matched_examples=0
|
67
|
+
@examples.each do |example|
|
68
|
+
match_index = rule.apply_to(example)
|
69
|
+
if match_index.nil?
|
70
|
+
next
|
71
|
+
else
|
72
|
+
rule_score+= (example.label_index - match_index).abs
|
73
|
+
matched_examples+=1
|
74
|
+
end
|
75
|
+
end
|
76
|
+
rule_score = rule_score.to_f/matched_examples unless matched_examples==0 #mean distance from label_index
|
77
|
+
-rule_score #So highest scoring = closest to label index.
|
78
|
+
end
|
79
|
+
return @candidates
|
80
|
+
end
|
81
|
+
|
82
|
+
def select_with_longer_end_landmarks
|
83
|
+
debug "Selecting rules that have longer end landmarks"
|
84
|
+
@candidates = highest_scoring_by {|rule| rule.landmarks.last.size unless rule.landmarks.last.nil?}
|
85
|
+
end
|
86
|
+
|
87
|
+
# Returns a random candidate. Meant for making the final choice in case
|
88
|
+
# previous selections have still left multiple candidates.
|
89
|
+
def random_from_remaining
|
90
|
+
debug "Selecting random from last #{candidates.size} candidate rules"
|
91
|
+
@candidates.sort_by {rand}.first
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module Ariel
|
2
|
+
|
3
|
+
# Provides methods that read an example document, using a StructureNode tree
|
4
|
+
# to populate a tree of Nodes with each labeled example.
|
5
|
+
# TODO: Fix the UTF issues this implementation is bound to create.
|
6
|
+
class ExampleDocumentLoader
|
7
|
+
|
8
|
+
# Assumes it is passed a root parent
|
9
|
+
def self.load_labeled_example(file, structure, loaded_example_hash)
|
10
|
+
raise ArgumentError, "Passed structure is not root parent" if structure.parent
|
11
|
+
string = file.respond_to?(:read) ? file.read : file
|
12
|
+
tokenstream = TokenStream.new
|
13
|
+
tokenstream.tokenize(string, true)
|
14
|
+
root = ExtractedNode.new(:root, tokenstream, structure)
|
15
|
+
structure.apply_extraction_tree_on(root, true)
|
16
|
+
root.each_descendant(true) do |extracted_node|
|
17
|
+
if extracted_node.parent
|
18
|
+
loaded_example_hash[extracted_node.meta.structure] << extracted_node
|
19
|
+
end
|
20
|
+
extracted_node.tokenstream.remove_label_tags
|
21
|
+
end
|
22
|
+
return loaded_example_hash
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.supervise_learning(structure, loaded_example_hash)
|
26
|
+
loaded_example_hash.each_pair do |structure_node, example_nodes|
|
27
|
+
start_examples=[]
|
28
|
+
end_examples=[]
|
29
|
+
example_nodes.each do |node|
|
30
|
+
start_tstream=node.parent.tokenstream #Rules are based on extracting from the parent
|
31
|
+
start_tstream.set_label_at(node.tokenstream.tokens.first.start_loc)
|
32
|
+
start_examples << start_tstream
|
33
|
+
end_tstream=node.parent.tokenstream.reverse
|
34
|
+
end_tstream.set_label_at(node.tokenstream.tokens.last.start_loc)
|
35
|
+
end_examples << end_tstream
|
36
|
+
end
|
37
|
+
learner = Learner.new(*start_examples)
|
38
|
+
start_rules = learner.learn_rule :forward
|
39
|
+
learner = Learner.new(*end_examples)
|
40
|
+
end_rules = learner.learn_rule :back
|
41
|
+
structure_node.ruleset=RuleSet.new(start_rules, end_rules)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.load_directory(dir, structure)
|
46
|
+
loaded_example_hash = Hash.new {|h, k| h[k]=[]}
|
47
|
+
Dir.glob("#{dir}/*") do |doc|
|
48
|
+
next if doc=~ /structure\.rb\z/
|
49
|
+
File.open(doc) do |file|
|
50
|
+
self.load_labeled_example(file, structure, loaded_example_hash)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
self.supervise_learning structure, loaded_example_hash
|
54
|
+
return structure
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
end
|
59
|
+
end
|