anystyle-cli 1.0.2 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +79 -9
- data/bin/anystyle +91 -0
- data/lib/anystyle/cli.rb +2 -0
- data/lib/anystyle/cli/commands/base.rb +11 -3
- data/lib/anystyle/cli/commands/check.rb +41 -0
- data/lib/anystyle/cli/commands/find.rb +2 -2
- data/lib/anystyle/cli/commands/train.rb +41 -0
- data/lib/anystyle/cli/version.rb +1 -1
- metadata +7 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c6c4e6580d9495afd965e3401f030cee94925747
|
4
|
+
data.tar.gz: 56e3842f3b3db05ea1a7a5b17ab4f72b5ff07773
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 01b90daac809b52ac69e70550952596512ef8df0bdc5503faabf4a173e83d4bf045dd2b0fcf4ae0bb28b165b182885e6f997f5f6f764638ed7b19dda67a1a87d
|
7
|
+
data.tar.gz: be29161a8a6ba925f2f21a4d50ad1cb41c93d945977cc3cdf026b58383e3c1d0f979c44966cb32934c1ce2d24d0bbba7751e8b9f771927c9c43d2eb6b8385546
|
data/README.md
CHANGED
@@ -10,22 +10,26 @@ anystyle --help
|
|
10
10
|
anystyle [global options] command [command options] [arguments...]
|
11
11
|
|
12
12
|
VERSION
|
13
|
-
1.
|
13
|
+
1.1.0 (cli 1.0.2, data 1.2.0)
|
14
14
|
|
15
15
|
GLOBAL OPTIONS
|
16
|
-
--
|
17
|
-
-
|
18
|
-
--
|
19
|
-
--
|
20
|
-
--
|
21
|
-
--
|
22
|
-
|
16
|
+
-F, --finder-model=file - Set the finder model file (default: none)
|
17
|
+
-P, --parser-model=file - Set the parser model file (default: none)
|
18
|
+
--adapter=name - Set the dictionary adapter (default: ruby)
|
19
|
+
-f, --format=name - Set the output format (default: ["json"])
|
20
|
+
--help - Show this message
|
21
|
+
--[no-]stdout - Print results directly to stdout
|
22
|
+
--[no-]verbose - Print status messages to stderr
|
23
|
+
--version - Display the program version
|
24
|
+
-w, --[no-]overwrite - Allow overwriting existing files
|
23
25
|
|
24
26
|
COMMANDS
|
27
|
+
check - Check tagged documents or references
|
25
28
|
find - Find and extract references from text documents
|
26
29
|
help - Shows a list of commands or help for one command
|
27
30
|
license - Print license information
|
28
31
|
parse - Parse and convert references
|
32
|
+
train - Create a new finder or parser model
|
29
33
|
|
30
34
|
anystyle help find
|
31
35
|
------------------
|
@@ -57,7 +61,6 @@ anystyle help find
|
|
57
61
|
|
58
62
|
Anlyzing PDF documents currently depends on `pdftotext' which must be
|
59
63
|
installed separately.
|
60
|
-
|
61
64
|
EXAMPLES
|
62
65
|
anystyle -f csl,xml find thesis.pdf
|
63
66
|
|
@@ -70,6 +73,24 @@ anystyle help find
|
|
70
73
|
if your document uses a multi-column layout) and save them in BibTeX in
|
71
74
|
`./bib/thesis.bib'.
|
72
75
|
|
76
|
+
anystyle find --crop 72 thesis.pdf -
|
77
|
+
|
78
|
+
Extract references from `thesis.pdf' cropping away one inch (72pt) from
|
79
|
+
each page border and print the results to STDOUT.
|
80
|
+
|
81
|
+
anystyle find --crop 72,28 thesis.pdf -
|
82
|
+
|
83
|
+
Extract references from `thesis.pdf' cropping away one inch (72pt) from
|
84
|
+
each page's left and right border, approx. 1cm (28pt) from the top
|
85
|
+
and bottom.
|
86
|
+
|
87
|
+
|
88
|
+
COMMAND OPTIONS
|
89
|
+
-C, --crop=pt - Set cropping boundary for text extraction (default: none)
|
90
|
+
--[no-]layout - Use layout mode for PDF text extraction (default: enabled)
|
91
|
+
--[no-]solo - Include references outside of reference sections
|
92
|
+
|
93
|
+
|
73
94
|
anystyle help parse
|
74
95
|
-------------------
|
75
96
|
COMMAND OPTIONS
|
@@ -111,6 +132,55 @@ anystyle help parse
|
|
111
132
|
|
112
133
|
Extract references from `input.txt' and print them to STDOUT in CSL/JSON.
|
113
134
|
|
135
|
+
|
136
|
+
anystyle help check
|
137
|
+
-------------------
|
138
|
+
NAME
|
139
|
+
check - Check tagged documents or references
|
140
|
+
|
141
|
+
SYNOPSIS
|
142
|
+
anystyle [global options] check input
|
143
|
+
|
144
|
+
DESCRIPTION
|
145
|
+
This manual page documents the AnyStyle `check' command. AnyStyle `check'
|
146
|
+
analyzes tagged text documents or references.
|
147
|
+
|
148
|
+
The input argument can be a single TTX or XML document, or a folder
|
149
|
+
containing multiple documents.
|
150
|
+
|
151
|
+
AnyStyle `check' supports the following input formats:
|
152
|
+
ttx Tagged document format, used for training the finder model;
|
153
|
+
xml References only, XML, suitable for training the parser model.
|
154
|
+
|
155
|
+
EXAMPLES
|
156
|
+
anystyle check training-data.xml
|
157
|
+
|
158
|
+
Checks all references in the XML file and prints a report to STDOUT.
|
159
|
+
|
160
|
+
|
161
|
+
anystyle help train
|
162
|
+
-------------------
|
163
|
+
NAME
|
164
|
+
train - Create a new finder or parser model
|
165
|
+
|
166
|
+
SYNOPSIS
|
167
|
+
anystyle [global options] train input [output]
|
168
|
+
|
169
|
+
DESCRIPTION
|
170
|
+
This manual page documents the AnyStyle `train' command. AnyStyle `train'
|
171
|
+
creates a new finder or parser model based on the supplied training sets.
|
172
|
+
|
173
|
+
The input argument can be a XML document, or a folder containing multiple
|
174
|
+
TTX documents.
|
175
|
+
|
176
|
+
EXAMPLES
|
177
|
+
anystyle train data.xml my-model.mod
|
178
|
+
|
179
|
+
Creates a new parser model based on the XML training set and saves it
|
180
|
+
as `my-model.mod'. To use your model use the global `--finder-model'
|
181
|
+
or `--parser-model' flags.
|
182
|
+
|
183
|
+
|
114
184
|
License
|
115
185
|
-------
|
116
186
|
Copyright 2011-2018 Sylvester Keil. All rights reserved.
|
data/bin/anystyle
CHANGED
@@ -30,6 +30,14 @@ switch ['w', 'overwrite'],
|
|
30
30
|
switch 'stdout',
|
31
31
|
desc: 'Print results directly to stdout'
|
32
32
|
|
33
|
+
flag ['F', 'finder-model'],
|
34
|
+
arg_name: 'file',
|
35
|
+
desc: 'Set the finder model file'
|
36
|
+
|
37
|
+
flag ['P', 'parser-model'],
|
38
|
+
arg_name: 'file',
|
39
|
+
desc: 'Set the parser model file'
|
40
|
+
|
33
41
|
flag 'adapter',
|
34
42
|
default_value: 'ruby',
|
35
43
|
arg_name: 'name',
|
@@ -46,6 +54,18 @@ flag ['f', 'format'],
|
|
46
54
|
|
47
55
|
pre do |opts|
|
48
56
|
AnyStyle::Dictionary.defaults[:adapter] = opts[:adapter]
|
57
|
+
|
58
|
+
unless opts[:'finder-model'].nil?
|
59
|
+
AnyStyle::Finder.defaults[:model] =
|
60
|
+
File.expand_path(opts[:'finder-model']).untaint
|
61
|
+
end
|
62
|
+
|
63
|
+
unless opts[:'parser-model'].nil?
|
64
|
+
AnyStyle::Parser.defaults[:model] =
|
65
|
+
File.expand_path(opts[:'parser-model']).untaint
|
66
|
+
end
|
67
|
+
|
68
|
+
AnyStyle
|
49
69
|
end
|
50
70
|
|
51
71
|
|
@@ -84,6 +104,17 @@ EXAMPLES
|
|
84
104
|
Extract references from `thesis.pdf' in `no-layout' mode (e.g., use this
|
85
105
|
if your document uses a multi-column layout) and save them in BibTeX in
|
86
106
|
`./bib/thesis.bib'.
|
107
|
+
|
108
|
+
anystyle find --crop 72 thesis.pdf -
|
109
|
+
|
110
|
+
Extract references from `thesis.pdf' cropping away one inch (72pt) from
|
111
|
+
each page border and print the results to STDOUT.
|
112
|
+
|
113
|
+
anystyle find --crop 72,28 thesis.pdf -
|
114
|
+
|
115
|
+
Extract references from `thesis.pdf' cropping away one inch (72pt) from
|
116
|
+
each page's left and right border, approx. 1cm (28pt) from the top
|
117
|
+
and bottom.
|
87
118
|
}.lstrip
|
88
119
|
|
89
120
|
arg :input
|
@@ -93,6 +124,16 @@ command :find do |cmd|
|
|
93
124
|
default_value: true,
|
94
125
|
desc: 'Use layout mode for PDF text extraction'
|
95
126
|
|
127
|
+
cmd.switch 'solo',
|
128
|
+
default_value: false,
|
129
|
+
desc: 'Include references outside of reference sections'
|
130
|
+
|
131
|
+
cmd.flag ['C', 'crop'],
|
132
|
+
arg_name: 'pt',
|
133
|
+
type: Array,
|
134
|
+
must_match: /\d+(,\d+)?|\d+,\d+(,-?\d+){2}/,
|
135
|
+
desc: 'Set cropping boundary for text extraction'
|
136
|
+
|
96
137
|
cmd.action do |opts, params, args|
|
97
138
|
Commands::Find.new(opts).run(args, params)
|
98
139
|
end
|
@@ -140,6 +181,56 @@ command :parse do |cmd|
|
|
140
181
|
end
|
141
182
|
end
|
142
183
|
|
184
|
+
desc 'Check tagged documents or references'
|
185
|
+
long_desc %{
|
186
|
+
This manual page documents the AnyStyle `check' command. AnyStyle `check'
|
187
|
+
analyzes tagged text documents or references.
|
188
|
+
|
189
|
+
The input argument can be a single TTX or XML document, or a folder
|
190
|
+
containing multiple documents.
|
191
|
+
|
192
|
+
AnyStyle `check' supports the following input formats:
|
193
|
+
ttx Tagged document format, used for training the finder model;
|
194
|
+
xml References only, XML, suitable for training the parser model.
|
195
|
+
|
196
|
+
EXAMPLES
|
197
|
+
anystyle check training-data.xml
|
198
|
+
|
199
|
+
Checks all references in the XML file and prints a report to STDOUT.
|
200
|
+
}.lstrip
|
201
|
+
|
202
|
+
arg :input
|
203
|
+
command :check do |cmd|
|
204
|
+
cmd.action do |opts, params, args|
|
205
|
+
Commands::Check.new(opts).run(args, params)
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
|
210
|
+
desc 'Create a new finder or parser model'
|
211
|
+
long_desc %{
|
212
|
+
This manual page documents the AnyStyle `train' command. AnyStyle `train'
|
213
|
+
creates a new finder or parser model based on the supplied training sets.
|
214
|
+
|
215
|
+
The input argument can be a XML document, or a folder containing multiple
|
216
|
+
TTX documents.
|
217
|
+
|
218
|
+
EXAMPLES
|
219
|
+
anystyle train data.xml my-model.mod
|
220
|
+
|
221
|
+
Creates a new parser model based on the XML training set and saves it
|
222
|
+
as `my-model.mod'. To use your model use the global `--finder-model'
|
223
|
+
or `--parser-model' flags.
|
224
|
+
}.lstrip
|
225
|
+
|
226
|
+
arg :input
|
227
|
+
arg :output, :optional
|
228
|
+
command :train do |cmd|
|
229
|
+
cmd.action do |opts, params, args|
|
230
|
+
Commands::Train.new(opts).run(args, params)
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
143
234
|
desc 'Print license information'
|
144
235
|
command :license do |cmd|
|
145
236
|
cmd.action do
|
data/lib/anystyle/cli.rb
CHANGED
@@ -28,8 +28,11 @@ module AnyStyle
|
|
28
28
|
options[:format].each(&block)
|
29
29
|
end
|
30
30
|
|
31
|
-
def find(input,
|
32
|
-
AnyStyle.find(input,
|
31
|
+
def find(input, opts = {})
|
32
|
+
AnyStyle.find(input,
|
33
|
+
format: :wapiti,
|
34
|
+
layout: opts[:layout],
|
35
|
+
crop: opts[:crop].nil? ? nil : opts[:crop].map(&:to_i))
|
33
36
|
end
|
34
37
|
|
35
38
|
def parse(input)
|
@@ -67,7 +70,12 @@ module AnyStyle
|
|
67
70
|
end
|
68
71
|
|
69
72
|
def set_output_folder(path)
|
70
|
-
|
73
|
+
case path
|
74
|
+
when nil, '-'
|
75
|
+
options[:stdout] = true
|
76
|
+
else
|
77
|
+
@output_folder = Pathname.new(path).expand_path
|
78
|
+
end
|
71
79
|
ensure
|
72
80
|
unless @output_folder.nil?
|
73
81
|
if @output_folder.exist?
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module AnyStyle
|
2
|
+
module CLI
|
3
|
+
module Commands
|
4
|
+
class Check < Base
|
5
|
+
def run(args, params)
|
6
|
+
walk args[0] do |path|
|
7
|
+
print 'Checking %.25s' % "#{path.basename}....................."
|
8
|
+
start = Time.now
|
9
|
+
stats = check path
|
10
|
+
report stats, Time.now - start
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def check(path)
|
15
|
+
case path.extname
|
16
|
+
when '.ttx'
|
17
|
+
AnyStyle.finder.check path.to_s.untaint
|
18
|
+
when '.xml'
|
19
|
+
AnyStyle.parser.check path.to_s.untaint
|
20
|
+
else
|
21
|
+
raise ArgumentError, "cannot check untagged input: #{path}"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def report(stats, time)
|
26
|
+
if stats[:token][:errors] == 0
|
27
|
+
puts ' ✓ %2ds' % time
|
28
|
+
else
|
29
|
+
puts '%4d seq %6.2f%% %6d tok %5.2f%% %2ds' % [
|
30
|
+
stats[:sequence][:errors],
|
31
|
+
stats[:sequence][:rate],
|
32
|
+
stats[:token][:errors],
|
33
|
+
stats[:token][:rate],
|
34
|
+
time
|
35
|
+
]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -6,8 +6,8 @@ module AnyStyle
|
|
6
6
|
set_output_folder args[1]
|
7
7
|
walk args[0] do |path, base_path|
|
8
8
|
say "Analyzing #{path.relative_path_from(base_path)} ..."
|
9
|
-
doc = find(path.to_s.untaint,
|
10
|
-
ref = doc[0].references
|
9
|
+
doc = find(path.to_s.untaint, params)
|
10
|
+
ref = doc[0].references(normalize_blocks: !params[:solo])
|
11
11
|
|
12
12
|
if ref.length == 0
|
13
13
|
say "no references found."
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module AnyStyle
|
2
|
+
module CLI
|
3
|
+
module Commands
|
4
|
+
class Train < Base
|
5
|
+
def run(args, params)
|
6
|
+
check_no_overwrite! args[1]
|
7
|
+
|
8
|
+
Wapiti.debug!
|
9
|
+
model = train(args[0])
|
10
|
+
|
11
|
+
if args[1].nil?
|
12
|
+
model.save
|
13
|
+
else
|
14
|
+
model.save File.expand_path(args[1]).untaint
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def train(path)
|
19
|
+
case
|
20
|
+
when File.extname(path) == '.xml'
|
21
|
+
AnyStyle.parser.train path.to_s.untaint
|
22
|
+
AnyStyle.parser.model
|
23
|
+
when File.directory?(path)
|
24
|
+
AnyStyle.finder.train path.to_s.untaint
|
25
|
+
AnyStyle.finder.model
|
26
|
+
else
|
27
|
+
raise ArgumentError, "cannot train input: #{path}"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def check_no_overwrite!(path)
|
32
|
+
if !overwrite? && (path.nil? || File.exist?(path))
|
33
|
+
raise RuntimeError,
|
34
|
+
"file exists, use --overwrite to force saving: #{path}"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
data/lib/anystyle/cli/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: anystyle-cli
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sylvester Keil
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-08-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: anystyle
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
19
|
+
version: '1.2'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
26
|
+
version: '1.2'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bibtex-ruby
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -65,8 +65,10 @@ files:
|
|
65
65
|
- bin/anystyle
|
66
66
|
- lib/anystyle/cli.rb
|
67
67
|
- lib/anystyle/cli/commands/base.rb
|
68
|
+
- lib/anystyle/cli/commands/check.rb
|
68
69
|
- lib/anystyle/cli/commands/find.rb
|
69
70
|
- lib/anystyle/cli/commands/parse.rb
|
71
|
+
- lib/anystyle/cli/commands/train.rb
|
70
72
|
- lib/anystyle/cli/version.rb
|
71
73
|
homepage: http://anystyle.io
|
72
74
|
licenses:
|
@@ -88,7 +90,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
88
90
|
version: '0'
|
89
91
|
requirements: []
|
90
92
|
rubyforge_project:
|
91
|
-
rubygems_version: 2.
|
93
|
+
rubygems_version: 2.6.13
|
92
94
|
signing_key:
|
93
95
|
specification_version: 4
|
94
96
|
summary: AnyStyle CLI
|