gnparser 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +70 -12
- data/lib/gnparser/client.rb +27 -10
- data/lib/gnparser/version.rb +1 -1
- data/lib/gnparser_pb.rb +2 -31
- data/lib/gnparser_services_pb.rb +1 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ca997f4abf937af1e72a922b235560328ce24cab715212f131cfad1faae0180d
|
4
|
+
data.tar.gz: c19783e6482bdf6f100355aaa1111a32363b88fc34f5012a56b99349c1f847ea
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 73c4aa417d996d80976afc931e8b6c67699cb388f61e34f5a1e872591a87ef6ebee76ac932fe5d70fe6cc4fee3f308a09be05dba34c86c051a80932bd7811f79
|
7
|
+
data.tar.gz: ab8df5a0a97c90bd7481d9a3e858e3fc2ff2ced99a597f5d1fe103f2111340d22fff206dcb508a4d0bdba5efbd818ad4f8fe092da5178861fa8cd9ea4bf49754
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# gnparser
|
2
2
|
|
3
|
-
gRPC client to [gnparser] project
|
3
|
+
gRPC client to [gnparser] project (Go version)
|
4
4
|
|
5
5
|
## Overview
|
6
6
|
|
@@ -15,21 +15,45 @@ gem install gnparser
|
|
15
15
|
|
16
16
|
## Usage
|
17
17
|
|
18
|
-
[Download][releases] the latest version of Go gnparser
|
19
|
-
on your local machine as a
|
18
|
+
[Download][releases] the latest version of Go `gnparser`, and start it
|
19
|
+
on your local machine as a gRPC server:
|
20
20
|
|
21
21
|
```bash
|
22
22
|
gnparser -g 8778
|
23
23
|
```
|
24
24
|
|
25
|
-
Now you will be able to use Ruby gnparser with its default settings.
|
25
|
+
Now you will be able to use Ruby `gnparser` with its default settings.
|
26
26
|
|
27
27
|
```ruby
|
28
28
|
require 'gnparser'
|
29
29
|
gnp = GNparser::Client.new
|
30
|
+
|
31
|
+
# To use different host and port:
|
32
|
+
gnp = GNparser::Client.new(host = 'parser.example.com', port = 1234 )
|
33
|
+
gnp = GNparser::Client.new(host = 'parser.example.com', port = '1234' )
|
34
|
+
# Use default 8778 port
|
35
|
+
gnp = GNparser::Client.new(host = 'parser.example.com')
|
36
|
+
# Use default 0.0.0.0 host
|
37
|
+
gnp = GNparser::Client.new(port = '1234' )
|
38
|
+
```
|
39
|
+
|
40
|
+
### Versions
|
41
|
+
|
42
|
+
To see version of the gem:
|
43
|
+
|
44
|
+
```ruby
|
45
|
+
gnp.version
|
46
|
+
```
|
47
|
+
|
48
|
+
To see version of the Go gnparser:
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
ver = gnp.parser_version
|
52
|
+
puts ver.value
|
53
|
+
puts ver.build_time.size
|
30
54
|
```
|
31
55
|
|
32
|
-
### Output
|
56
|
+
### Output formats
|
33
57
|
|
34
58
|
gem supports the following formats:
|
35
59
|
|
@@ -40,12 +64,22 @@ gem supports the following formats:
|
|
40
64
|
: ``Pretty`` JSON output.
|
41
65
|
|
42
66
|
**simple**
|
43
|
-
: Pipe-separated string where the
|
67
|
+
: Pipe-separated string where the fields are, id, verbatim name, canonical form,
|
44
68
|
extended canonical form, authorship, year, quality of parsing.
|
45
69
|
|
46
70
|
**debug**
|
47
71
|
: Abtract Syntax Tree of the parsed result.
|
48
72
|
|
73
|
+
### Option preserve_order
|
74
|
+
|
75
|
+
To speed parsing up parser normally executes several jobs in parallel, and
|
76
|
+
as a result the order the jobs go back may be different from input. It means
|
77
|
+
the user has to match output's ``verbatim`` field with the list of its names.
|
78
|
+
|
79
|
+
If speed of parsing is sufficient you can use one-threaded parsing that
|
80
|
+
guarantees that the order of output will be exactly the same as the order of
|
81
|
+
the input. For this purpose use ``preserve_order: true`` option.
|
82
|
+
|
49
83
|
### Parse one name
|
50
84
|
|
51
85
|
```ruby
|
@@ -57,13 +91,13 @@ puts res.error
|
|
57
91
|
For non-default format:
|
58
92
|
|
59
93
|
```ruby
|
60
|
-
res = gnp.parse('Puma concolor (Linn.)', :pretty)
|
94
|
+
res = gnp.parse('Puma concolor (Linn.)', format: :pretty)
|
61
95
|
...
|
62
|
-
res = gnp.parse('Puma concolor (Linn.)', 'pretty')
|
96
|
+
res = gnp.parse('Puma concolor (Linn.)', format: 'pretty')
|
63
97
|
...
|
64
|
-
res = gnp.parse('Puma concolor (Linn.)', 'simple')
|
98
|
+
res = gnp.parse('Puma concolor (Linn.)', format: 'simple')
|
65
99
|
...
|
66
|
-
res = gnp.parse('Puma concolor (Linn.)', :simple)
|
100
|
+
res = gnp.parse('Puma concolor (Linn.)', format: :simple)
|
67
101
|
...
|
68
102
|
```
|
69
103
|
|
@@ -71,7 +105,20 @@ res = gnp.parse('Puma concolor (Linn.)', :simple)
|
|
71
105
|
|
72
106
|
```ruby
|
73
107
|
names = ['Plantago major L.', 'Homo sapiens Linn. 1758', 'Bubo bubo']
|
74
|
-
|
108
|
+
|
109
|
+
# fast, might get output in different order from input
|
110
|
+
res = gnp.parse_ary(names, format: :pretty)
|
111
|
+
res.each do |r|
|
112
|
+
puts r.value
|
113
|
+
puts r.error
|
114
|
+
end
|
115
|
+
|
116
|
+
# slower, returns the same order for output as it was for input
|
117
|
+
results = []
|
118
|
+
res = gnp.parse_ary(names, format: :pretty, preserve_order: true)
|
119
|
+
res.each_with_index |r, i|
|
120
|
+
results << { input: names[i], output: r }
|
121
|
+
end
|
75
122
|
```
|
76
123
|
|
77
124
|
### Parse names from a file
|
@@ -80,7 +127,18 @@ File should have one name string per line.
|
|
80
127
|
|
81
128
|
```ruby
|
82
129
|
path = File.join(__dir__, "path", "to", "names.txt")
|
83
|
-
res = gnp.parse_file(path, :compact)
|
130
|
+
res = gnp.parse_file(path, format: :compact)
|
131
|
+
res.each do |r|
|
132
|
+
puts r.value
|
133
|
+
puts r.error
|
134
|
+
end
|
135
|
+
|
136
|
+
# preserving order of items in output
|
137
|
+
results = []
|
138
|
+
res = gnp.parse_file(path, format: :compact, preserve_order: true)
|
139
|
+
res.each_with_index |r, i|
|
140
|
+
results << { input: names[i], output: r }
|
141
|
+
end
|
84
142
|
```
|
85
143
|
|
86
144
|
[gnparser]: https://gitlab.com/gogna/gnparser
|
data/lib/gnparser/client.rb
CHANGED
@@ -9,29 +9,46 @@ module GNparser
|
|
9
9
|
}.freeze
|
10
10
|
# Gnfinder::Client connects to gnfinder server
|
11
11
|
class Client
|
12
|
+
PARSER_MIN_VERSION = 'v0.6.0'
|
13
|
+
|
12
14
|
def initialize(host = '0.0.0.0', port = '8778')
|
13
15
|
@stub = Grpc::GNparser::Stub.new("#{host}:#{port}",
|
14
16
|
:this_channel_is_insecure)
|
17
|
+
return if parser_version.value >= PARSER_MIN_VERSION
|
18
|
+
|
19
|
+
raise 'gRPC server of gnparser should be at least ' \
|
20
|
+
' #{PARSER_MIN_VERSION}.\n Download latest version from ' \
|
21
|
+
'https://gitlab/gogna/gnparser/releases.'
|
15
22
|
end
|
16
23
|
|
17
|
-
def
|
24
|
+
def parser_version
|
18
25
|
@stub.ver(Grpc::Void.new)
|
19
26
|
end
|
20
27
|
|
21
|
-
def parse(name,
|
22
|
-
|
23
|
-
@stub.parse(enum.each_item).next
|
28
|
+
def parse(name, opts = {})
|
29
|
+
parse_iter([name], opts).next
|
24
30
|
end
|
25
31
|
|
26
|
-
def parse_ary(ary,
|
27
|
-
|
28
|
-
@stub.parse(enum.each_item)
|
32
|
+
def parse_ary(ary, opts = {})
|
33
|
+
parse_iter(ary, opts)
|
29
34
|
end
|
30
35
|
|
31
|
-
def parse_file(path,
|
36
|
+
def parse_file(path, opts = {})
|
32
37
|
f = File.open(path)
|
33
|
-
|
34
|
-
|
38
|
+
parse_iter(f, opts)
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def parse_iter(iter, opts)
|
44
|
+
format = opts[:format] || :compact
|
45
|
+
preserve_order = opts[:preserve_order]
|
46
|
+
enum = InputEnum.new(iter, format)
|
47
|
+
if preserve_order
|
48
|
+
@stub.parse_in_order(enum.each_item)
|
49
|
+
else
|
50
|
+
@stub.parse(enum.each_item)
|
51
|
+
end
|
35
52
|
end
|
36
53
|
end
|
37
54
|
|
data/lib/gnparser/version.rb
CHANGED
data/lib/gnparser_pb.rb
CHANGED
@@ -16,35 +16,9 @@ Google::Protobuf::DescriptorPool.generated_pool.build do
|
|
16
16
|
optional :name, :string, 2
|
17
17
|
end
|
18
18
|
end
|
19
|
-
add_message "grpc.Name" do
|
20
|
-
optional :parsed, :bool, 1
|
21
|
-
optional :quality, :int32, 2
|
22
|
-
optional :verbatim, :string, 3
|
23
|
-
optional :normalized, :string, 4
|
24
|
-
optional :canonical_simple, :string, 5
|
25
|
-
optional :canonical_full, :string, 6
|
26
|
-
optional :genus, :message, 7, "grpc.Entry"
|
27
|
-
optional :subgenus, :message, 8, "grpc.Entry"
|
28
|
-
optional :species, :message, 9, "grpc.Entry"
|
29
|
-
optional :subspicies, :message, 10, "grpc.Entry"
|
30
|
-
optional :variety, :message, 11, "grpc.Entry"
|
31
|
-
optional :form, :message, 12, "grpc.Entry"
|
32
|
-
end
|
33
|
-
add_message "grpc.Entry" do
|
34
|
-
optional :value, :string, 1
|
35
|
-
optional :norm_value, :string, 2
|
36
|
-
optional :position, :message, 3, "grpc.Pos"
|
37
|
-
optional :authors, :string, 4
|
38
|
-
optional :year, :string, 5
|
39
|
-
optional :score, :string, 6
|
40
|
-
end
|
41
|
-
add_message "grpc.Pos" do
|
42
|
-
optional :start, :int32, 1
|
43
|
-
optional :end, :int32, 2
|
44
|
-
end
|
45
19
|
add_message "grpc.Output" do
|
46
|
-
optional :value, :string,
|
47
|
-
optional :error, :string,
|
20
|
+
optional :value, :string, 2
|
21
|
+
optional :error, :string, 3
|
48
22
|
end
|
49
23
|
add_enum "grpc.Format" do
|
50
24
|
value :Compact, 0
|
@@ -58,9 +32,6 @@ module Grpc
|
|
58
32
|
Version = Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Version").msgclass
|
59
33
|
Void = Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Void").msgclass
|
60
34
|
Input = Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Input").msgclass
|
61
|
-
Name = Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Name").msgclass
|
62
|
-
Entry = Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Entry").msgclass
|
63
|
-
Pos = Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Pos").msgclass
|
64
35
|
Output = Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Output").msgclass
|
65
36
|
Format = Google::Protobuf::DescriptorPool.generated_pool.lookup("grpc.Format").enummodule
|
66
37
|
end
|
data/lib/gnparser_services_pb.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gnparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-01-
|
11
|
+
date: 2019-01-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: grpc
|