anystyle-cli 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: c3c832096f90a3cc1f233ecdd4f500c2bb63460cc733142f764616cd33fc95f9
4
+ data.tar.gz: 11d151afafd488b955ba8f51863e60497007daf11fa192ec608769b9a670ac6e
5
+ SHA512:
6
+ metadata.gz: 783c9e5ec5dcf5d456fda3d1a0db502da61670e525dff9927f0af3f1f3c120f8929fb47006173a0c5b2ce70925043811a4ff65d42913b68f69d3af7ad87a4425
7
+ data.tar.gz: a1a737c36024f02252458a16c25f212594d658b5b825876b8c358c73e9b2b72c8c1010261c732a6fe1382bb2021787576d0552e5291386912a62eaaca0370774
data/LICENSE ADDED
@@ -0,0 +1,25 @@
1
+ BSD 2-Clause License
2
+
3
+ Copyright (c) 2018, Sylvester Keil
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ * Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ * Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.md ADDED
@@ -0,0 +1,119 @@
1
+ AnyStyle Command Line Interface
2
+ ===============================
3
+
4
+ anystyle --help
5
+ ---------------
6
+ NAME
7
+ anystyle - Finds and parses bibliographic references
8
+
9
+ SYNOPSIS
10
+ anystyle [global options] command [command options] [arguments...]
11
+
12
+ VERSION
13
+ 1.0.0 (cli 1.0.0, data 1.2.0)
14
+
15
+ GLOBAL OPTIONS
16
+ --adapter=name - Set the dictionary adapter (default: ruby)
17
+ -f, --format=name - Set the output format (default: ["json"])
18
+ --help - Show this message
19
+ --[no-]stdout - Print results directly to stdout
20
+ --[no-]verbose - Print status messages to stderr
21
+ --version - Display the program version
22
+ -w, --[no-]overwrite - Allow overwriting existing files
23
+
24
+ COMMANDS
25
+ find - Find and extract references from text documents
26
+ help - Shows a list of commands or help for one command
27
+ license - Print license information
28
+ parse - Parse and convert references
29
+
30
+ anystyle help find
31
+ ------------------
32
+ NAME
33
+ find - Find and extract references from text documents
34
+
35
+ SYNOPSIS
36
+ anystyle [global options] find [command options] input [output]
37
+
38
+ DESCRIPTION
39
+ This manual page documents the AnyStyle `find' command. AnyStyle `find'
40
+ analyzes PDF or text documents and extracts all references it finds.
41
+
42
+ The input argument can be a single PDF or text document, or a folder
43
+ containing multiple documents. The (optional) output argument specifies
44
+ the folder where the results shall be saved; if no output folder is
45
+ specified, results will be saved in the folder containing the input.
46
+
47
+ AnyStyle `find' supports the following formats:
48
+ bib BibTeX (references only);
49
+ csl CSL/JSON (references only);
50
+ json AnyStyle JSON (references only);
51
+ ref One reference per line, suitable for parser input;
52
+ txt Plain text document;
53
+ ttx Tagged document format, used for training the finder model;
54
+ xml References only, XML, suitable for training the parser model.
55
+
56
+ You can specify multiple output formats, separated by commas.
57
+
58
+ Anlyzing PDF documents currently depends on `pdftotext' which must be
59
+ installed separately.
60
+
61
+ EXAMPLES
62
+ anystyle -f csl,xml find thesis.pdf
63
+
64
+ Extract references from `thesis.pdf' and save them in `thesis.csl' and
65
+ `thesis.xml'.
66
+
67
+ anystyle -f bib find --no-layout thesis.pdf bib
68
+
69
+ Extract references from `thesis.pdf' in `no-layout' mode (e.g., use this
70
+ if your document uses a multi-column layout) and save them in BibTeX in
71
+ `./bib/thesis.bib'.
72
+
73
+ anystyle help parse
74
+ -------------------
75
+ COMMAND OPTIONS
76
+ --[no-]layout - Use layout mode for PDF text extraction (default: enabled)
77
+ NAME
78
+ parse - Parse and convert references
79
+
80
+ SYNOPSIS
81
+ anystyle [global options] parse input [output]
82
+
83
+ DESCRIPTION
84
+ This manual page documents the AnyStyle `parse' command. AnyStyle `parse'
85
+ segments references (one per line) and converts them into structured
86
+ formats.
87
+
88
+ The input argument can be a single text document containing one full
89
+ reference per line (blank lines will be ignored), or a folder containing
90
+ multiple documents. The (optional) output argument specifies
91
+ the folder where the results shall be saved; if no output folder is
92
+ specified, results will be saved in the folder containing the input.
93
+
94
+ AnyStyle `parse' supports the following formats:
95
+ bib BibTeX (normalized);
96
+ csl CSL/JSON (normalized);
97
+ json AnyStyle JSON (normalized);
98
+ ref One reference per line, suitable for parser input;
99
+ txt Same as `ref';
100
+ xml XML, suitable for training the parser model.
101
+
102
+ You can specify multiple output formats, separated by commas.
103
+
104
+ EXAMPLES
105
+ anystyle -f json,xml parse biblio.txt
106
+
107
+ Extract references from `biblio.txt' and save them in `biblio.json' and
108
+ `biblio.xml'.
109
+
110
+ anystyle --stdout -f csl parse input.txt
111
+
112
+ Extract references from `input.txt' and print them to STDOUT in CSL/JSON.
113
+
114
+ License
115
+ -------
116
+ Copyright 2011-2018 Sylvester Keil. All rights reserved.
117
+
118
+ AnyStyle is distributed under a BSD-style license.
119
+ See LICENSE for details.
data/bin/anystyle ADDED
@@ -0,0 +1,182 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'gli'
4
+ require 'anystyle/cli'
5
+
6
+ include GLI::App
7
+ include AnyStyle::CLI
8
+
9
+ program_desc 'Finds and parses bibliographic references'
10
+
11
+ version '%s (cli %s, data %s)' % [
12
+ AnyStyle::VERSION, VERSION, AnyStyle::Data::VERSION
13
+ ]
14
+
15
+ subcommand_option_handling :normal
16
+ arguments :strict
17
+
18
+ wrap_help_text :verbatim
19
+
20
+ accept(Array) { |value| value.split(',') }
21
+
22
+ #config_file '.anystyle'
23
+
24
+ switch 'verbose',
25
+ desc: 'Print status messages to stderr'
26
+
27
+ switch ['w', 'overwrite'],
28
+ desc: 'Allow overwriting existing files'
29
+
30
+ switch 'stdout',
31
+ desc: 'Print results directly to stdout'
32
+
33
+ flag 'adapter',
34
+ default_value: 'ruby',
35
+ arg_name: 'name',
36
+ must_match: %w{ ruby memory gdbm },
37
+ desc: 'Set the dictionary adapter'
38
+
39
+ flag ['f', 'format'],
40
+ default_value: ['json'],
41
+ arg_name: 'name',
42
+ type: Array,
43
+ must_match: /(bib|csl|json|ref|ttx|txt|xml)(,(bib|csl|json|ref|ttx|txt|xml))*/,
44
+ desc: 'Set the output format'
45
+
46
+
47
+ pre do |opts|
48
+ AnyStyle::Dictionary.defaults[:adapter] = opts[:adapter]
49
+ end
50
+
51
+
52
+ desc 'Find and extract references from text documents'
53
+ long_desc %{
54
+ This manual page documents the AnyStyle `find' command. AnyStyle `find'
55
+ analyzes PDF or text documents and extracts all references it finds.
56
+
57
+ The input argument can be a single PDF or text document, or a folder
58
+ containing multiple documents. The (optional) output argument specifies
59
+ the folder where the results shall be saved; if no output folder is
60
+ specified, results will be saved in the folder containing the input.
61
+
62
+ AnyStyle `find' supports the following formats:
63
+ bib BibTeX (references only);
64
+ csl CSL/JSON (references only);
65
+ json AnyStyle JSON (references only);
66
+ ref One reference per line, suitable for parser input;
67
+ txt Plain text document;
68
+ ttx Tagged document format, used for training the finder model;
69
+ xml References only, XML, suitable for training the parser model.
70
+
71
+ You can specify multiple output formats, separated by commas.
72
+
73
+ Anlyzing PDF documents currently depends on `pdftotext' which must be
74
+ installed separately.
75
+
76
+ EXAMPLES
77
+ anystyle -f csl,xml find thesis.pdf
78
+
79
+ Extract references from `thesis.pdf' and save them in `thesis.csl' and
80
+ `thesis.xml'.
81
+
82
+ anystyle -f bib find --no-layout thesis.pdf bib
83
+
84
+ Extract references from `thesis.pdf' in `no-layout' mode (e.g., use this
85
+ if your document uses a multi-column layout) and save them in BibTeX in
86
+ `./bib/thesis.bib'.
87
+ }.lstrip
88
+
89
+ arg :input
90
+ arg :output, :optional
91
+ command :find do |cmd|
92
+ cmd.switch 'layout',
93
+ default_value: true,
94
+ desc: 'Use layout mode for PDF text extraction'
95
+
96
+ cmd.action do |opts, params, args|
97
+ Commands::Find.new(opts).run(args, params)
98
+ end
99
+ end
100
+
101
+
102
+ desc 'Parse and convert references'
103
+ long_desc %{
104
+ This manual page documents the AnyStyle `parse' command. AnyStyle `parse'
105
+ segments references (one per line) and converts them into structured
106
+ formats.
107
+
108
+ The input argument can be a single text document containing one full
109
+ reference per line (blank lines will be ignored), or a folder containing
110
+ multiple documents. The (optional) output argument specifies
111
+ the folder where the results shall be saved; if no output folder is
112
+ specified, results will be saved in the folder containing the input.
113
+
114
+ AnyStyle `parse' supports the following formats:
115
+ bib BibTeX (normalized);
116
+ csl CSL/JSON (normalized);
117
+ json AnyStyle JSON (normalized);
118
+ ref One reference per line, suitable for parser input;
119
+ txt Same as `ref';
120
+ xml XML, suitable for training the parser model.
121
+
122
+ You can specify multiple output formats, separated by commas.
123
+
124
+ EXAMPLES
125
+ anystyle -f json,xml parse biblio.txt
126
+
127
+ Extract references from `biblio.txt' and save them in `biblio.json' and
128
+ `biblio.xml'.
129
+
130
+ anystyle --stdout -f csl parse input.txt
131
+
132
+ Extract references from `input.txt' and print them to STDOUT in CSL/JSON.
133
+ }.lstrip
134
+
135
+ arg :input
136
+ arg :output, :optional
137
+ command :parse do |cmd|
138
+ cmd.action do |opts, params, args|
139
+ Commands::Parse.new(opts).run(args, params)
140
+ end
141
+ end
142
+
143
+ desc 'Print license information'
144
+ command :license do |cmd|
145
+ cmd.action do
146
+ puts 'AnyStyle.'
147
+ puts 'Copyright (C) 2011-%d Sylvester Keil.' % Time.now.year
148
+ puts <<~EOL
149
+
150
+ Wapiti.
151
+ Copyright (C) 2009-2013 CNRS.
152
+
153
+ All rights reserved.
154
+
155
+ Redistribution and use in source and binary forms, with or without
156
+ modification, are permitted provided that the following conditions are met:
157
+
158
+ * Redistributions of source code must retain the above copyright notice, this
159
+ list of conditions and the following disclaimer.
160
+
161
+ * Redistributions in binary form must reproduce the above copyright notice,
162
+ this list of conditions and the following disclaimer in the documentation
163
+ and/or other materials provided with the distribution.
164
+
165
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR
166
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
167
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
168
+ EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
169
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
170
+ BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
171
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
172
+ OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
173
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
174
+ EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
175
+
176
+ EOL
177
+ end
178
+ end
179
+
180
+ exit run(ARGV)
181
+
182
+ # vim: syntax=ruby
@@ -0,0 +1,8 @@
1
+ require 'json'
2
+ require 'pathname'
3
+ require 'anystyle'
4
+
5
+ require 'anystyle/cli/version'
6
+ require 'anystyle/cli/commands/base'
7
+ require 'anystyle/cli/commands/find'
8
+ require 'anystyle/cli/commands/parse'
@@ -0,0 +1,114 @@
1
+ module AnyStyle
2
+ module CLI
3
+ module Commands
4
+ class Base
5
+ attr_reader :options, :output_folder
6
+
7
+ def initialize(options)
8
+ @options = options
9
+ end
10
+
11
+ def run(args, params)
12
+ raise NotImplementedYet
13
+ end
14
+
15
+ def verbose?
16
+ !!options[:verbose]
17
+ end
18
+
19
+ def stdout?
20
+ !!options[:stdout]
21
+ end
22
+
23
+ def overwrite?
24
+ !!options[:overwrite]
25
+ end
26
+
27
+ def each_format(&block)
28
+ options[:format].each(&block)
29
+ end
30
+
31
+ def find(input, **opts)
32
+ AnyStyle.find(input, format: :wapiti, **opts)
33
+ end
34
+
35
+ def parse(input)
36
+ AnyStyle.parse(input, format: :wapiti)
37
+ end
38
+
39
+ def format(dataset, fmt)
40
+ case fmt
41
+ when 'bib'
42
+ AnyStyle.parser.format_bibtex(dataset).to_s
43
+ when 'csl'
44
+ JSON.pretty_generate AnyStyle.parser.format_csl(dataset)
45
+ when 'json'
46
+ JSON.pretty_generate AnyStyle.parser.format_hash(dataset)
47
+ when 'ref', 'txt'
48
+ dataset.to_txt
49
+ when 'xml'
50
+ dataset.to_xml(indent: 2).to_s
51
+ else
52
+ raise ArgumentError, "format not supported: #{fmt}"
53
+ end
54
+ end
55
+
56
+ def extsub(path, new_extname)
57
+ basename = path.basename(path.extname)
58
+ path.dirname.join("#{basename}#{new_extname}")
59
+ end
60
+
61
+ def transpose(path, base_path)
62
+ if output_folder.nil?
63
+ path
64
+ else
65
+ output_folder.join(path.relative_path_from(base_path))
66
+ end
67
+ end
68
+
69
+ def set_output_folder(path)
70
+ @output_folder = Pathname.new(path).expand_path unless path.nil?
71
+ ensure
72
+ unless @output_folder.nil?
73
+ if @output_folder.exist?
74
+ raise ArgumentError,
75
+ "not a directory: #{path}" unless @output_folder.directory?
76
+ else
77
+ @output_folder.mkdir
78
+ end
79
+ end
80
+ end
81
+
82
+ def say(*args)
83
+ STDERR.print(*args) if verbose?
84
+ end
85
+
86
+ def walk(input)
87
+ path = Pathname(input).expand_path
88
+ raise ArgumentError, "path does not exist: #{input}" unless path.exist?
89
+
90
+ if path.directory?
91
+ path.each_child do |file|
92
+ yield file, path unless file.directory?
93
+ end
94
+ else
95
+ yield path, path.dirname
96
+ end
97
+ end
98
+
99
+ def write(content, path, base_path)
100
+ if stdout?
101
+ STDOUT.puts(content)
102
+ else
103
+ path = transpose(path, base_path)
104
+ if !overwrite? && path.exist?
105
+ raise RuntimeError,
106
+ "file exists, use --overwrite to force saving: #{path}"
107
+ end
108
+ File.write path, content
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,44 @@
1
+ module AnyStyle
2
+ module CLI
3
+ module Commands
4
+ class Find < Base
5
+ def run(args, params)
6
+ set_output_folder args[1]
7
+ walk args[0] do |path, base_path|
8
+ say "Analyzing #{path.relative_path_from(base_path)} ..."
9
+ doc = find(path.to_s.untaint, layout: params[:layout])
10
+ ref = doc[0].references
11
+
12
+ if ref.length == 0
13
+ say "no references found.\n"
14
+ else
15
+ say "#{ref.length} references found.\n"
16
+ dst = nil
17
+ each_format do |fmt|
18
+ case fmt
19
+ when 'ttx'
20
+ say "Formatting document as #{fmt} ...\n"
21
+ res = doc.to_txt tagged: true
22
+ when 'txt'
23
+ say "Formatting document as #{fmt} ...\n"
24
+ res = doc.to_txt tagged: false
25
+ when 'ref'
26
+ say "Formatting references as #{fmt} ...\n"
27
+ res = ref.join("\n")
28
+ else
29
+ say "Formatting references as #{fmt} ...\n"
30
+ dst ||= parse(ref.join("\n"))
31
+ res = format(dst, fmt)
32
+ end
33
+
34
+ out = extsub(path, ".#{fmt}")
35
+ say "Writing #{out.relative_path_from(base_path)} ...\n"
36
+ write res, out, base_path
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,23 @@
1
+ module AnyStyle
2
+ module CLI
3
+ module Commands
4
+ class Parse < Base
5
+ def run(args, params)
6
+ set_output_folder args[1]
7
+ walk args[0] do |path, base_path|
8
+ say "Parsing #{path.relative_path_from(base_path)} ..."
9
+ dataset = parse(path.to_s.untaint)
10
+ say "#{dataset.length} references found.\n"
11
+ each_format do |fmt|
12
+ say "Formatting references as #{fmt} ...\n"
13
+ res = format(dataset, fmt)
14
+ out = extsub(path, ".#{fmt}")
15
+ say "Writing #{out.relative_path_from(base_path)} ...\n"
16
+ write res, out, base_path
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,5 @@
1
+ module AnyStyle
2
+ module CLI
3
+ VERSION = '1.0.0'.freeze
4
+ end
5
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: anystyle-cli
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Sylvester Keil
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-06-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: anystyle
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bibtex-ruby
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '4.4'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '4.4'
41
+ - !ruby/object:Gem::Dependency
42
+ name: gli
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '2.17'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.17'
55
+ description: A command line interface to the AnyStyle Parser and Finder.
56
+ email:
57
+ - http://sylvester.keil.or.at
58
+ executables:
59
+ - anystyle
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - LICENSE
64
+ - README.md
65
+ - bin/anystyle
66
+ - lib/anystyle/cli.rb
67
+ - lib/anystyle/cli/commands/base.rb
68
+ - lib/anystyle/cli/commands/find.rb
69
+ - lib/anystyle/cli/commands/parse.rb
70
+ - lib/anystyle/cli/version.rb
71
+ homepage: http://anystyle.io
72
+ licenses:
73
+ - BSD-2-Clause
74
+ metadata: {}
75
+ post_install_message:
76
+ rdoc_options: []
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '2.3'
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ requirements: []
90
+ rubyforge_project:
91
+ rubygems_version: 2.7.4
92
+ signing_key:
93
+ specification_version: 4
94
+ summary: AnyStyle CLI
95
+ test_files: []