anystyle-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: c3c832096f90a3cc1f233ecdd4f500c2bb63460cc733142f764616cd33fc95f9
4
+ data.tar.gz: 11d151afafd488b955ba8f51863e60497007daf11fa192ec608769b9a670ac6e
5
+ SHA512:
6
+ metadata.gz: 783c9e5ec5dcf5d456fda3d1a0db502da61670e525dff9927f0af3f1f3c120f8929fb47006173a0c5b2ce70925043811a4ff65d42913b68f69d3af7ad87a4425
7
+ data.tar.gz: a1a737c36024f02252458a16c25f212594d658b5b825876b8c358c73e9b2b72c8c1010261c732a6fe1382bb2021787576d0552e5291386912a62eaaca0370774
data/LICENSE ADDED
@@ -0,0 +1,25 @@
1
+ BSD 2-Clause License
2
+
3
+ Copyright (c) 2018, Sylvester Keil
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ * Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ * Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.md ADDED
@@ -0,0 +1,119 @@
1
+ AnyStyle Command Line Interface
2
+ ===============================
3
+
4
+ anystyle --help
5
+ ---------------
6
+ NAME
7
+ anystyle - Finds and parses bibliographic references
8
+
9
+ SYNOPSIS
10
+ anystyle [global options] command [command options] [arguments...]
11
+
12
+ VERSION
13
+ 1.0.0 (cli 1.0.0, data 1.2.0)
14
+
15
+ GLOBAL OPTIONS
16
+ --adapter=name - Set the dictionary adapter (default: ruby)
17
+ -f, --format=name - Set the output format (default: ["json"])
18
+ --help - Show this message
19
+ --[no-]stdout - Print results directly to stdout
20
+ --[no-]verbose - Print status messages to stderr
21
+ --version - Display the program version
22
+ -w, --[no-]overwrite - Allow overwriting existing files
23
+
24
+ COMMANDS
25
+ find - Find and extract references from text documents
26
+ help - Shows a list of commands or help for one command
27
+ license - Print license information
28
+ parse - Parse and convert references
29
+
30
+ anystyle help find
31
+ ------------------
32
+ NAME
33
+ find - Find and extract references from text documents
34
+
35
+ SYNOPSIS
36
+ anystyle [global options] find [command options] input [output]
37
+
38
+ DESCRIPTION
39
+ This manual page documents the AnyStyle `find' command. AnyStyle `find'
40
+ analyzes PDF or text documents and extracts all references it finds.
41
+
42
+ The input argument can be a single PDF or text document, or a folder
43
+ containing multiple documents. The (optional) output argument specifies
44
+ the folder where the results shall be saved; if no output folder is
45
+ specified, results will be saved in the folder containing the input.
46
+
47
+ AnyStyle `find' supports the following formats:
48
+ bib BibTeX (references only);
49
+ csl CSL/JSON (references only);
50
+ json AnyStyle JSON (references only);
51
+ ref One reference per line, suitable for parser input;
52
+ txt Plain text document;
53
+ ttx Tagged document format, used for training the finder model;
54
+ xml References only, XML, suitable for training the parser model.
55
+
56
+ You can specify multiple output formats, separated by commas.
57
+
58
+ Anlyzing PDF documents currently depends on `pdftotext' which must be
59
+ installed separately.
60
+
61
+ EXAMPLES
62
+ anystyle -f csl,xml find thesis.pdf
63
+
64
+ Extract references from `thesis.pdf' and save them in `thesis.csl' and
65
+ `thesis.xml'.
66
+
67
+ anystyle -f bib find --no-layout thesis.pdf bib
68
+
69
+ Extract references from `thesis.pdf' in `no-layout' mode (e.g., use this
70
+ if your document uses a multi-column layout) and save them in BibTeX in
71
+ `./bib/thesis.bib'.
72
+
73
+ anystyle help parse
74
+ -------------------
75
+ COMMAND OPTIONS
76
+ --[no-]layout - Use layout mode for PDF text extraction (default: enabled)
77
+ NAME
78
+ parse - Parse and convert references
79
+
80
+ SYNOPSIS
81
+ anystyle [global options] parse input [output]
82
+
83
+ DESCRIPTION
84
+ This manual page documents the AnyStyle `parse' command. AnyStyle `parse'
85
+ segments references (one per line) and converts them into structured
86
+ formats.
87
+
88
+ The input argument can be a single text document containing one full
89
+ reference per line (blank lines will be ignored), or a folder containing
90
+ multiple documents. The (optional) output argument specifies
91
+ the folder where the results shall be saved; if no output folder is
92
+ specified, results will be saved in the folder containing the input.
93
+
94
+ AnyStyle `parse' supports the following formats:
95
+ bib BibTeX (normalized);
96
+ csl CSL/JSON (normalized);
97
+ json AnyStyle JSON (normalized);
98
+ ref One reference per line, suitable for parser input;
99
+ txt Same as `ref';
100
+ xml XML, suitable for training the parser model.
101
+
102
+ You can specify multiple output formats, separated by commas.
103
+
104
+ EXAMPLES
105
+ anystyle -f json,xml parse biblio.txt
106
+
107
+ Extract references from `biblio.txt' and save them in `biblio.json' and
108
+ `biblio.xml'.
109
+
110
+ anystyle --stdout -f csl parse input.txt
111
+
112
+ Extract references from `input.txt' and print them to STDOUT in CSL/JSON.
113
+
114
+ License
115
+ -------
116
+ Copyright 2011-2018 Sylvester Keil. All rights reserved.
117
+
118
+ AnyStyle is distributed under a BSD-style license.
119
+ See LICENSE for details.
data/bin/anystyle ADDED
@@ -0,0 +1,182 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'gli'
4
+ require 'anystyle/cli'
5
+
6
+ include GLI::App
7
+ include AnyStyle::CLI
8
+
9
+ program_desc 'Finds and parses bibliographic references'
10
+
11
+ version '%s (cli %s, data %s)' % [
12
+ AnyStyle::VERSION, VERSION, AnyStyle::Data::VERSION
13
+ ]
14
+
15
+ subcommand_option_handling :normal
16
+ arguments :strict
17
+
18
+ wrap_help_text :verbatim
19
+
20
+ accept(Array) { |value| value.split(',') }
21
+
22
+ #config_file '.anystyle'
23
+
24
+ switch 'verbose',
25
+ desc: 'Print status messages to stderr'
26
+
27
+ switch ['w', 'overwrite'],
28
+ desc: 'Allow overwriting existing files'
29
+
30
+ switch 'stdout',
31
+ desc: 'Print results directly to stdout'
32
+
33
+ flag 'adapter',
34
+ default_value: 'ruby',
35
+ arg_name: 'name',
36
+ must_match: %w{ ruby memory gdbm },
37
+ desc: 'Set the dictionary adapter'
38
+
39
+ flag ['f', 'format'],
40
+ default_value: ['json'],
41
+ arg_name: 'name',
42
+ type: Array,
43
+ must_match: /(bib|csl|json|ref|ttx|txt|xml)(,(bib|csl|json|ref|ttx|txt|xml))*/,
44
+ desc: 'Set the output format'
45
+
46
+
47
+ pre do |opts|
48
+ AnyStyle::Dictionary.defaults[:adapter] = opts[:adapter]
49
+ end
50
+
51
+
52
+ desc 'Find and extract references from text documents'
53
+ long_desc %{
54
+ This manual page documents the AnyStyle `find' command. AnyStyle `find'
55
+ analyzes PDF or text documents and extracts all references it finds.
56
+
57
+ The input argument can be a single PDF or text document, or a folder
58
+ containing multiple documents. The (optional) output argument specifies
59
+ the folder where the results shall be saved; if no output folder is
60
+ specified, results will be saved in the folder containing the input.
61
+
62
+ AnyStyle `find' supports the following formats:
63
+ bib BibTeX (references only);
64
+ csl CSL/JSON (references only);
65
+ json AnyStyle JSON (references only);
66
+ ref One reference per line, suitable for parser input;
67
+ txt Plain text document;
68
+ ttx Tagged document format, used for training the finder model;
69
+ xml References only, XML, suitable for training the parser model.
70
+
71
+ You can specify multiple output formats, separated by commas.
72
+
73
+ Anlyzing PDF documents currently depends on `pdftotext' which must be
74
+ installed separately.
75
+
76
+ EXAMPLES
77
+ anystyle -f csl,xml find thesis.pdf
78
+
79
+ Extract references from `thesis.pdf' and save them in `thesis.csl' and
80
+ `thesis.xml'.
81
+
82
+ anystyle -f bib find --no-layout thesis.pdf bib
83
+
84
+ Extract references from `thesis.pdf' in `no-layout' mode (e.g., use this
85
+ if your document uses a multi-column layout) and save them in BibTeX in
86
+ `./bib/thesis.bib'.
87
+ }.lstrip
88
+
89
+ arg :input
90
+ arg :output, :optional
91
+ command :find do |cmd|
92
+ cmd.switch 'layout',
93
+ default_value: true,
94
+ desc: 'Use layout mode for PDF text extraction'
95
+
96
+ cmd.action do |opts, params, args|
97
+ Commands::Find.new(opts).run(args, params)
98
+ end
99
+ end
100
+
101
+
102
+ desc 'Parse and convert references'
103
+ long_desc %{
104
+ This manual page documents the AnyStyle `parse' command. AnyStyle `parse'
105
+ segments references (one per line) and converts them into structured
106
+ formats.
107
+
108
+ The input argument can be a single text document containing one full
109
+ reference per line (blank lines will be ignored), or a folder containing
110
+ multiple documents. The (optional) output argument specifies
111
+ the folder where the results shall be saved; if no output folder is
112
+ specified, results will be saved in the folder containing the input.
113
+
114
+ AnyStyle `parse' supports the following formats:
115
+ bib BibTeX (normalized);
116
+ csl CSL/JSON (normalized);
117
+ json AnyStyle JSON (normalized);
118
+ ref One reference per line, suitable for parser input;
119
+ txt Same as `ref';
120
+ xml XML, suitable for training the parser model.
121
+
122
+ You can specify multiple output formats, separated by commas.
123
+
124
+ EXAMPLES
125
+ anystyle -f json,xml parse biblio.txt
126
+
127
+ Extract references from `biblio.txt' and save them in `biblio.json' and
128
+ `biblio.xml'.
129
+
130
+ anystyle --stdout -f csl parse input.txt
131
+
132
+ Extract references from `input.txt' and print them to STDOUT in CSL/JSON.
133
+ }.lstrip
134
+
135
+ arg :input
136
+ arg :output, :optional
137
+ command :parse do |cmd|
138
+ cmd.action do |opts, params, args|
139
+ Commands::Parse.new(opts).run(args, params)
140
+ end
141
+ end
142
+
143
+ desc 'Print license information'
144
+ command :license do |cmd|
145
+ cmd.action do
146
+ puts 'AnyStyle.'
147
+ puts 'Copyright (C) 2011-%d Sylvester Keil.' % Time.now.year
148
+ puts <<~EOL
149
+
150
+ Wapiti.
151
+ Copyright (C) 2009-2013 CNRS.
152
+
153
+ All rights reserved.
154
+
155
+ Redistribution and use in source and binary forms, with or without
156
+ modification, are permitted provided that the following conditions are met:
157
+
158
+ * Redistributions of source code must retain the above copyright notice, this
159
+ list of conditions and the following disclaimer.
160
+
161
+ * Redistributions in binary form must reproduce the above copyright notice,
162
+ this list of conditions and the following disclaimer in the documentation
163
+ and/or other materials provided with the distribution.
164
+
165
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR
166
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
167
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
168
+ EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
169
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
170
+ BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
171
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
172
+ OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
173
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
174
+ EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
175
+
176
+ EOL
177
+ end
178
+ end
179
+
180
+ exit run(ARGV)
181
+
182
+ # vim: syntax=ruby
@@ -0,0 +1,8 @@
1
+ require 'json'
2
+ require 'pathname'
3
+ require 'anystyle'
4
+
5
+ require 'anystyle/cli/version'
6
+ require 'anystyle/cli/commands/base'
7
+ require 'anystyle/cli/commands/find'
8
+ require 'anystyle/cli/commands/parse'
@@ -0,0 +1,114 @@
1
+ module AnyStyle
2
+ module CLI
3
+ module Commands
4
+ class Base
5
+ attr_reader :options, :output_folder
6
+
7
+ def initialize(options)
8
+ @options = options
9
+ end
10
+
11
+ def run(args, params)
12
+ raise NotImplementedYet
13
+ end
14
+
15
+ def verbose?
16
+ !!options[:verbose]
17
+ end
18
+
19
+ def stdout?
20
+ !!options[:stdout]
21
+ end
22
+
23
+ def overwrite?
24
+ !!options[:overwrite]
25
+ end
26
+
27
+ def each_format(&block)
28
+ options[:format].each(&block)
29
+ end
30
+
31
+ def find(input, **opts)
32
+ AnyStyle.find(input, format: :wapiti, **opts)
33
+ end
34
+
35
+ def parse(input)
36
+ AnyStyle.parse(input, format: :wapiti)
37
+ end
38
+
39
+ def format(dataset, fmt)
40
+ case fmt
41
+ when 'bib'
42
+ AnyStyle.parser.format_bibtex(dataset).to_s
43
+ when 'csl'
44
+ JSON.pretty_generate AnyStyle.parser.format_csl(dataset)
45
+ when 'json'
46
+ JSON.pretty_generate AnyStyle.parser.format_hash(dataset)
47
+ when 'ref', 'txt'
48
+ dataset.to_txt
49
+ when 'xml'
50
+ dataset.to_xml(indent: 2).to_s
51
+ else
52
+ raise ArgumentError, "format not supported: #{fmt}"
53
+ end
54
+ end
55
+
56
+ def extsub(path, new_extname)
57
+ basename = path.basename(path.extname)
58
+ path.dirname.join("#{basename}#{new_extname}")
59
+ end
60
+
61
+ def transpose(path, base_path)
62
+ if output_folder.nil?
63
+ path
64
+ else
65
+ output_folder.join(path.relative_path_from(base_path))
66
+ end
67
+ end
68
+
69
+ def set_output_folder(path)
70
+ @output_folder = Pathname.new(path).expand_path unless path.nil?
71
+ ensure
72
+ unless @output_folder.nil?
73
+ if @output_folder.exist?
74
+ raise ArgumentError,
75
+ "not a directory: #{path}" unless @output_folder.directory?
76
+ else
77
+ @output_folder.mkdir
78
+ end
79
+ end
80
+ end
81
+
82
+ def say(*args)
83
+ STDERR.print(*args) if verbose?
84
+ end
85
+
86
+ def walk(input)
87
+ path = Pathname(input).expand_path
88
+ raise ArgumentError, "path does not exist: #{input}" unless path.exist?
89
+
90
+ if path.directory?
91
+ path.each_child do |file|
92
+ yield file, path unless file.directory?
93
+ end
94
+ else
95
+ yield path, path.dirname
96
+ end
97
+ end
98
+
99
+ def write(content, path, base_path)
100
+ if stdout?
101
+ STDOUT.puts(content)
102
+ else
103
+ path = transpose(path, base_path)
104
+ if !overwrite? && path.exist?
105
+ raise RuntimeError,
106
+ "file exists, use --overwrite to force saving: #{path}"
107
+ end
108
+ File.write path, content
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,44 @@
1
+ module AnyStyle
2
+ module CLI
3
+ module Commands
4
+ class Find < Base
5
+ def run(args, params)
6
+ set_output_folder args[1]
7
+ walk args[0] do |path, base_path|
8
+ say "Analyzing #{path.relative_path_from(base_path)} ..."
9
+ doc = find(path.to_s.untaint, layout: params[:layout])
10
+ ref = doc[0].references
11
+
12
+ if ref.length == 0
13
+ say "no references found.\n"
14
+ else
15
+ say "#{ref.length} references found.\n"
16
+ dst = nil
17
+ each_format do |fmt|
18
+ case fmt
19
+ when 'ttx'
20
+ say "Formatting document as #{fmt} ...\n"
21
+ res = doc.to_txt tagged: true
22
+ when 'txt'
23
+ say "Formatting document as #{fmt} ...\n"
24
+ res = doc.to_txt tagged: false
25
+ when 'ref'
26
+ say "Formatting references as #{fmt} ...\n"
27
+ res = ref.join("\n")
28
+ else
29
+ say "Formatting references as #{fmt} ...\n"
30
+ dst ||= parse(ref.join("\n"))
31
+ res = format(dst, fmt)
32
+ end
33
+
34
+ out = extsub(path, ".#{fmt}")
35
+ say "Writing #{out.relative_path_from(base_path)} ...\n"
36
+ write res, out, base_path
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,23 @@
1
+ module AnyStyle
2
+ module CLI
3
+ module Commands
4
+ class Parse < Base
5
+ def run(args, params)
6
+ set_output_folder args[1]
7
+ walk args[0] do |path, base_path|
8
+ say "Parsing #{path.relative_path_from(base_path)} ..."
9
+ dataset = parse(path.to_s.untaint)
10
+ say "#{dataset.length} references found.\n"
11
+ each_format do |fmt|
12
+ say "Formatting references as #{fmt} ...\n"
13
+ res = format(dataset, fmt)
14
+ out = extsub(path, ".#{fmt}")
15
+ say "Writing #{out.relative_path_from(base_path)} ...\n"
16
+ write res, out, base_path
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,5 @@
1
+ module AnyStyle
2
+ module CLI
3
+ VERSION = '1.0.0'.freeze
4
+ end
5
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: anystyle-cli
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Sylvester Keil
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-06-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: anystyle
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bibtex-ruby
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '4.4'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '4.4'
41
+ - !ruby/object:Gem::Dependency
42
+ name: gli
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '2.17'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.17'
55
+ description: A command line interface to the AnyStyle Parser and Finder.
56
+ email:
57
+ - http://sylvester.keil.or.at
58
+ executables:
59
+ - anystyle
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - LICENSE
64
+ - README.md
65
+ - bin/anystyle
66
+ - lib/anystyle/cli.rb
67
+ - lib/anystyle/cli/commands/base.rb
68
+ - lib/anystyle/cli/commands/find.rb
69
+ - lib/anystyle/cli/commands/parse.rb
70
+ - lib/anystyle/cli/version.rb
71
+ homepage: http://anystyle.io
72
+ licenses:
73
+ - BSD-2-Clause
74
+ metadata: {}
75
+ post_install_message:
76
+ rdoc_options: []
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '2.3'
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ requirements: []
90
+ rubyforge_project:
91
+ rubygems_version: 2.7.4
92
+ signing_key:
93
+ specification_version: 4
94
+ summary: AnyStyle CLI
95
+ test_files: []