anystyle-cli 1.0.2 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: '082b162dc4771fef9ba5ec4ee4fa623931ba0e34c8617a5b47ca7545bb14ce58'
4
- data.tar.gz: d035793b06dc909b570defd917995a782fe5cee3eafc9674aa3e696b64e73e98
2
+ SHA1:
3
+ metadata.gz: c6c4e6580d9495afd965e3401f030cee94925747
4
+ data.tar.gz: 56e3842f3b3db05ea1a7a5b17ab4f72b5ff07773
5
5
  SHA512:
6
- metadata.gz: a2836a714f4071917502ee641733d491c33710fa9b0e07f113ebf1c07c3bf2f2a97f9d5621312ca2eb942768a5b481597e03573ffa27cdc31ecba5d02351a7eb
7
- data.tar.gz: d51da26bceb9b7f2dd56ae1322145035c8669ad52ab5e0ac7bb11a6f9972ad3f2a757cfbb3e61272855d5605be4b3de40a44c1c2a644e1bea744e9bcd0fb319f
6
+ metadata.gz: 01b90daac809b52ac69e70550952596512ef8df0bdc5503faabf4a173e83d4bf045dd2b0fcf4ae0bb28b165b182885e6f997f5f6f764638ed7b19dda67a1a87d
7
+ data.tar.gz: be29161a8a6ba925f2f21a4d50ad1cb41c93d945977cc3cdf026b58383e3c1d0f979c44966cb32934c1ce2d24d0bbba7751e8b9f771927c9c43d2eb6b8385546
data/README.md CHANGED
@@ -10,22 +10,26 @@ anystyle --help
10
10
  anystyle [global options] command [command options] [arguments...]
11
11
 
12
12
  VERSION
13
- 1.0.0 (cli 1.0.0, data 1.2.0)
13
+ 1.1.0 (cli 1.0.2, data 1.2.0)
14
14
 
15
15
  GLOBAL OPTIONS
16
- --adapter=name - Set the dictionary adapter (default: ruby)
17
- -f, --format=name - Set the output format (default: ["json"])
18
- --help - Show this message
19
- --[no-]stdout - Print results directly to stdout
20
- --[no-]verbose - Print status messages to stderr
21
- --version - Display the program version
22
- -w, --[no-]overwrite - Allow overwriting existing files
16
+ -F, --finder-model=file - Set the finder model file (default: none)
17
+ -P, --parser-model=file - Set the parser model file (default: none)
18
+ --adapter=name - Set the dictionary adapter (default: ruby)
19
+ -f, --format=name - Set the output format (default: ["json"])
20
+ --help - Show this message
21
+ --[no-]stdout - Print results directly to stdout
22
+ --[no-]verbose - Print status messages to stderr
23
+ --version - Display the program version
24
+ -w, --[no-]overwrite - Allow overwriting existing files
23
25
 
24
26
  COMMANDS
27
+ check - Check tagged documents or references
25
28
  find - Find and extract references from text documents
26
29
  help - Shows a list of commands or help for one command
27
30
  license - Print license information
28
31
  parse - Parse and convert references
32
+ train - Create a new finder or parser model
29
33
 
30
34
  anystyle help find
31
35
  ------------------
@@ -57,7 +61,6 @@ anystyle help find
57
61
 
58
62
  Anlyzing PDF documents currently depends on `pdftotext' which must be
59
63
  installed separately.
60
-
61
64
  EXAMPLES
62
65
  anystyle -f csl,xml find thesis.pdf
63
66
 
@@ -70,6 +73,24 @@ anystyle help find
70
73
  if your document uses a multi-column layout) and save them in BibTeX in
71
74
  `./bib/thesis.bib'.
72
75
 
76
+ anystyle find --crop 72 thesis.pdf -
77
+
78
+ Extract references from `thesis.pdf' cropping away one inch (72pt) from
79
+ each page border and print the results to STDOUT.
80
+
81
+ anystyle find --crop 72,28 thesis.pdf -
82
+
83
+ Extract references from `thesis.pdf' cropping away one inch (72pt) from
84
+ each page's left and right border, approx. 1cm (28pt) from the top
85
+ and bottom.
86
+
87
+
88
+ COMMAND OPTIONS
89
+ -C, --crop=pt - Set cropping boundary for text extraction (default: none)
90
+ --[no-]layout - Use layout mode for PDF text extraction (default: enabled)
91
+ --[no-]solo - Include references outside of reference sections
92
+
93
+
73
94
  anystyle help parse
74
95
  -------------------
75
96
  COMMAND OPTIONS
@@ -111,6 +132,55 @@ anystyle help parse
111
132
 
112
133
  Extract references from `input.txt' and print them to STDOUT in CSL/JSON.
113
134
 
135
+
136
+ anystyle help check
137
+ -------------------
138
+ NAME
139
+ check - Check tagged documents or references
140
+
141
+ SYNOPSIS
142
+ anystyle [global options] check input
143
+
144
+ DESCRIPTION
145
+ This manual page documents the AnyStyle `check' command. AnyStyle `check'
146
+ analyzes tagged text documents or references.
147
+
148
+ The input argument can be a single TTX or XML document, or a folder
149
+ containing multiple documents.
150
+
151
+ AnyStyle `check' supports the following input formats:
152
+ ttx Tagged document format, used for training the finder model;
153
+ xml References only, XML, suitable for training the parser model.
154
+
155
+ EXAMPLES
156
+ anystyle check training-data.xml
157
+
158
+ Checks all references in the XML file and prints a report to STDOUT.
159
+
160
+
161
+ anystyle help train
162
+ -------------------
163
+ NAME
164
+ train - Create a new finder or parser model
165
+
166
+ SYNOPSIS
167
+ anystyle [global options] train input [output]
168
+
169
+ DESCRIPTION
170
+ This manual page documents the AnyStyle `train' command. AnyStyle `train'
171
+ creates a new finder or parser model based on the supplied training sets.
172
+
173
+ The input argument can be a XML document, or a folder containing multiple
174
+ TTX documents.
175
+
176
+ EXAMPLES
177
+ anystyle train data.xml my-model.mod
178
+
179
+ Creates a new parser model based on the XML training set and saves it
180
+ as `my-model.mod'. To use your model use the global `--finder-model'
181
+ or `--parser-model' flags.
182
+
183
+
114
184
  License
115
185
  -------
116
186
  Copyright 2011-2018 Sylvester Keil. All rights reserved.
data/bin/anystyle CHANGED
@@ -30,6 +30,14 @@ switch ['w', 'overwrite'],
30
30
  switch 'stdout',
31
31
  desc: 'Print results directly to stdout'
32
32
 
33
+ flag ['F', 'finder-model'],
34
+ arg_name: 'file',
35
+ desc: 'Set the finder model file'
36
+
37
+ flag ['P', 'parser-model'],
38
+ arg_name: 'file',
39
+ desc: 'Set the parser model file'
40
+
33
41
  flag 'adapter',
34
42
  default_value: 'ruby',
35
43
  arg_name: 'name',
@@ -46,6 +54,18 @@ flag ['f', 'format'],
46
54
 
47
55
  pre do |opts|
48
56
  AnyStyle::Dictionary.defaults[:adapter] = opts[:adapter]
57
+
58
+ unless opts[:'finder-model'].nil?
59
+ AnyStyle::Finder.defaults[:model] =
60
+ File.expand_path(opts[:'finder-model']).untaint
61
+ end
62
+
63
+ unless opts[:'parser-model'].nil?
64
+ AnyStyle::Parser.defaults[:model] =
65
+ File.expand_path(opts[:'parser-model']).untaint
66
+ end
67
+
68
+ AnyStyle
49
69
  end
50
70
 
51
71
 
@@ -84,6 +104,17 @@ EXAMPLES
84
104
  Extract references from `thesis.pdf' in `no-layout' mode (e.g., use this
85
105
  if your document uses a multi-column layout) and save them in BibTeX in
86
106
  `./bib/thesis.bib'.
107
+
108
+ anystyle find --crop 72 thesis.pdf -
109
+
110
+ Extract references from `thesis.pdf' cropping away one inch (72pt) from
111
+ each page border and print the results to STDOUT.
112
+
113
+ anystyle find --crop 72,28 thesis.pdf -
114
+
115
+ Extract references from `thesis.pdf' cropping away one inch (72pt) from
116
+ each page's left and right border, approx. 1cm (28pt) from the top
117
+ and bottom.
87
118
  }.lstrip
88
119
 
89
120
  arg :input
@@ -93,6 +124,16 @@ command :find do |cmd|
93
124
  default_value: true,
94
125
  desc: 'Use layout mode for PDF text extraction'
95
126
 
127
+ cmd.switch 'solo',
128
+ default_value: false,
129
+ desc: 'Include references outside of reference sections'
130
+
131
+ cmd.flag ['C', 'crop'],
132
+ arg_name: 'pt',
133
+ type: Array,
134
+ must_match: /\d+(,\d+)?|\d+,\d+(,-?\d+){2}/,
135
+ desc: 'Set cropping boundary for text extraction'
136
+
96
137
  cmd.action do |opts, params, args|
97
138
  Commands::Find.new(opts).run(args, params)
98
139
  end
@@ -140,6 +181,56 @@ command :parse do |cmd|
140
181
  end
141
182
  end
142
183
 
184
+ desc 'Check tagged documents or references'
185
+ long_desc %{
186
+ This manual page documents the AnyStyle `check' command. AnyStyle `check'
187
+ analyzes tagged text documents or references.
188
+
189
+ The input argument can be a single TTX or XML document, or a folder
190
+ containing multiple documents.
191
+
192
+ AnyStyle `check' supports the following input formats:
193
+ ttx Tagged document format, used for training the finder model;
194
+ xml References only, XML, suitable for training the parser model.
195
+
196
+ EXAMPLES
197
+ anystyle check training-data.xml
198
+
199
+ Checks all references in the XML file and prints a report to STDOUT.
200
+ }.lstrip
201
+
202
+ arg :input
203
+ command :check do |cmd|
204
+ cmd.action do |opts, params, args|
205
+ Commands::Check.new(opts).run(args, params)
206
+ end
207
+ end
208
+
209
+
210
+ desc 'Create a new finder or parser model'
211
+ long_desc %{
212
+ This manual page documents the AnyStyle `train' command. AnyStyle `train'
213
+ creates a new finder or parser model based on the supplied training sets.
214
+
215
+ The input argument can be a XML document, or a folder containing multiple
216
+ TTX documents.
217
+
218
+ EXAMPLES
219
+ anystyle train data.xml my-model.mod
220
+
221
+ Creates a new parser model based on the XML training set and saves it
222
+ as `my-model.mod'. To use your model use the global `--finder-model'
223
+ or `--parser-model' flags.
224
+ }.lstrip
225
+
226
+ arg :input
227
+ arg :output, :optional
228
+ command :train do |cmd|
229
+ cmd.action do |opts, params, args|
230
+ Commands::Train.new(opts).run(args, params)
231
+ end
232
+ end
233
+
143
234
  desc 'Print license information'
144
235
  command :license do |cmd|
145
236
  cmd.action do
data/lib/anystyle/cli.rb CHANGED
@@ -4,5 +4,7 @@ require 'anystyle'
4
4
 
5
5
  require 'anystyle/cli/version'
6
6
  require 'anystyle/cli/commands/base'
7
+ require 'anystyle/cli/commands/check'
7
8
  require 'anystyle/cli/commands/find'
8
9
  require 'anystyle/cli/commands/parse'
10
+ require 'anystyle/cli/commands/train'
@@ -28,8 +28,11 @@ module AnyStyle
28
28
  options[:format].each(&block)
29
29
  end
30
30
 
31
- def find(input, **opts)
32
- AnyStyle.find(input, format: :wapiti, **opts)
31
+ def find(input, opts = {})
32
+ AnyStyle.find(input,
33
+ format: :wapiti,
34
+ layout: opts[:layout],
35
+ crop: opts[:crop].nil? ? nil : opts[:crop].map(&:to_i))
33
36
  end
34
37
 
35
38
  def parse(input)
@@ -67,7 +70,12 @@ module AnyStyle
67
70
  end
68
71
 
69
72
  def set_output_folder(path)
70
- @output_folder = Pathname.new(path).expand_path unless path.nil?
73
+ case path
74
+ when nil, '-'
75
+ options[:stdout] = true
76
+ else
77
+ @output_folder = Pathname.new(path).expand_path
78
+ end
71
79
  ensure
72
80
  unless @output_folder.nil?
73
81
  if @output_folder.exist?
@@ -0,0 +1,41 @@
1
+ module AnyStyle
2
+ module CLI
3
+ module Commands
4
+ class Check < Base
5
+ def run(args, params)
6
+ walk args[0] do |path|
7
+ print 'Checking %.25s' % "#{path.basename}....................."
8
+ start = Time.now
9
+ stats = check path
10
+ report stats, Time.now - start
11
+ end
12
+ end
13
+
14
+ def check(path)
15
+ case path.extname
16
+ when '.ttx'
17
+ AnyStyle.finder.check path.to_s.untaint
18
+ when '.xml'
19
+ AnyStyle.parser.check path.to_s.untaint
20
+ else
21
+ raise ArgumentError, "cannot check untagged input: #{path}"
22
+ end
23
+ end
24
+
25
+ def report(stats, time)
26
+ if stats[:token][:errors] == 0
27
+ puts ' ✓ %2ds' % time
28
+ else
29
+ puts '%4d seq %6.2f%% %6d tok %5.2f%% %2ds' % [
30
+ stats[:sequence][:errors],
31
+ stats[:sequence][:rate],
32
+ stats[:token][:errors],
33
+ stats[:token][:rate],
34
+ time
35
+ ]
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -6,8 +6,8 @@ module AnyStyle
6
6
  set_output_folder args[1]
7
7
  walk args[0] do |path, base_path|
8
8
  say "Analyzing #{path.relative_path_from(base_path)} ..."
9
- doc = find(path.to_s.untaint, layout: params[:layout])
10
- ref = doc[0].references
9
+ doc = find(path.to_s.untaint, params)
10
+ ref = doc[0].references(normalize_blocks: !params[:solo])
11
11
 
12
12
  if ref.length == 0
13
13
  say "no references found."
@@ -0,0 +1,41 @@
1
+ module AnyStyle
2
+ module CLI
3
+ module Commands
4
+ class Train < Base
5
+ def run(args, params)
6
+ check_no_overwrite! args[1]
7
+
8
+ Wapiti.debug!
9
+ model = train(args[0])
10
+
11
+ if args[1].nil?
12
+ model.save
13
+ else
14
+ model.save File.expand_path(args[1]).untaint
15
+ end
16
+ end
17
+
18
+ def train(path)
19
+ case
20
+ when File.extname(path) == '.xml'
21
+ AnyStyle.parser.train path.to_s.untaint
22
+ AnyStyle.parser.model
23
+ when File.directory?(path)
24
+ AnyStyle.finder.train path.to_s.untaint
25
+ AnyStyle.finder.model
26
+ else
27
+ raise ArgumentError, "cannot train input: #{path}"
28
+ end
29
+ end
30
+
31
+ def check_no_overwrite!(path)
32
+ if !overwrite? && (path.nil? || File.exist?(path))
33
+ raise RuntimeError,
34
+ "file exists, use --overwrite to force saving: #{path}"
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+
@@ -1,5 +1,5 @@
1
1
  module AnyStyle
2
2
  module CLI
3
- VERSION = '1.0.2'.freeze
3
+ VERSION = '1.1.0'.freeze
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anystyle-cli
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sylvester Keil
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-07-11 00:00:00.000000000 Z
11
+ date: 2018-08-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: anystyle
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.0'
19
+ version: '1.2'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.0'
26
+ version: '1.2'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: bibtex-ruby
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -65,8 +65,10 @@ files:
65
65
  - bin/anystyle
66
66
  - lib/anystyle/cli.rb
67
67
  - lib/anystyle/cli/commands/base.rb
68
+ - lib/anystyle/cli/commands/check.rb
68
69
  - lib/anystyle/cli/commands/find.rb
69
70
  - lib/anystyle/cli/commands/parse.rb
71
+ - lib/anystyle/cli/commands/train.rb
70
72
  - lib/anystyle/cli/version.rb
71
73
  homepage: http://anystyle.io
72
74
  licenses:
@@ -88,7 +90,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
88
90
  version: '0'
89
91
  requirements: []
90
92
  rubyforge_project:
91
- rubygems_version: 2.7.4
93
+ rubygems_version: 2.6.13
92
94
  signing_key:
93
95
  specification_version: 4
94
96
  summary: AnyStyle CLI