ruby-spacy 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/CHANGELOG.md +24 -7
- data/Gemfile +1 -1
- data/README.md +120 -22
- data/lib/ruby-spacy/openai_client.rb +149 -0
- data/lib/ruby-spacy/version.rb +1 -1
- data/lib/ruby-spacy.rb +215 -101
- data/ruby-spacy.gemspec +2 -2
- metadata +18 -20
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d6005c638c2b268fe162b288e124439be6a525952557a48b0b50685bbd2a6ea1
|
|
4
|
+
data.tar.gz: 41dbc057c9ec51ffa8d6f1149fb8acde3fb52a251299d0209b4e2d351942eac0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5be0efa9e649b3d46da859472ce403adaa3cdaa34d4158e7a531680eb2830ae64779ec6ada8f0f6e324cc9cb314fb1fcbc617daa26e37e91a7d14f703caeec2d
|
|
7
|
+
data.tar.gz: b8f56b4842fea3bec1b35366624c7ab9297c3a3b25c9a8502dc32c623593e511d9da538bf3e5cac272baf854cf4c2c97d4129790b492329183d88873467f8dbb
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
|
@@ -1,17 +1,34 @@
|
|
|
1
1
|
# Change Log
|
|
2
2
|
|
|
3
|
+
## 0.3.0 - 2025-01-06
|
|
4
|
+
### Added
|
|
5
|
+
- Ruby 4.0 support
|
|
6
|
+
- `Doc#to_bytes` for serializing documents to binary format
|
|
7
|
+
- `Doc.from_bytes` for restoring documents from binary data
|
|
8
|
+
- `PhraseMatcher` class for efficient phrase matching
|
|
9
|
+
- `Language#phrase_matcher` helper method
|
|
10
|
+
|
|
11
|
+
### Changed
|
|
12
|
+
- Replaced `ruby-openai` gem with custom `OpenAIClient` implementation
|
|
13
|
+
- Updated default OpenAI model to `gpt-5-mini`
|
|
14
|
+
- Updated embeddings model to `text-embedding-3-small`
|
|
15
|
+
- Changed `max_tokens` parameter to `max_completion_tokens` (backward compatible)
|
|
16
|
+
- Added `fiddle` gem dependency (required for Ruby 4.0)
|
|
17
|
+
|
|
18
|
+
## 0.2.4 - 2024-12-11
|
|
19
|
+
### Changed
|
|
20
|
+
- Timeout and retry feature for `Spacy::Language.new`
|
|
21
|
+
|
|
3
22
|
## 0.2.3 - 2024-08-27
|
|
4
23
|
- Timeout option added to `Spacy::Language.new`
|
|
5
|
-
- Default
|
|
6
|
-
|
|
7
|
-
## 0.2.0 - 2022-10-02
|
|
8
|
-
- spaCy 3.7.0 supported
|
|
24
|
+
- Default OpenAI models updated to `gpt-4o-mini`
|
|
9
25
|
|
|
10
26
|
## 0.2.0 - 2022-10-02
|
|
11
27
|
### Added
|
|
12
|
-
-
|
|
13
|
-
- `Doc
|
|
14
|
-
- `Doc
|
|
28
|
+
- spaCy 3.7.0 supported
|
|
29
|
+
- `Doc#openai_query`
|
|
30
|
+
- `Doc#openai_completion`
|
|
31
|
+
- `Doc#openai_embeddings`
|
|
15
32
|
|
|
16
33
|
## 0.1.4.1 - 2021-07-06
|
|
17
34
|
- Test code refined
|
data/Gemfile
CHANGED
|
@@ -5,9 +5,9 @@ source "https://rubygems.org"
|
|
|
5
5
|
# Specify your gem's dependencies in ruby-spacy.gemspec
|
|
6
6
|
gemspec
|
|
7
7
|
|
|
8
|
+
gem "fiddle" # Required for Ruby 4.0+ (moved from default to bundled gem)
|
|
8
9
|
gem "numpy"
|
|
9
10
|
gem "pycall", "~> 1.5.1"
|
|
10
|
-
gem "ruby-openai"
|
|
11
11
|
gem "terminal-table"
|
|
12
12
|
|
|
13
13
|
group :development do
|
data/README.md
CHANGED
|
@@ -13,10 +13,11 @@
|
|
|
13
13
|
| ✅ | Access to pre-trained word vectors |
|
|
14
14
|
| ✅ | OpenAI Chat/Completion/Embeddings API integration |
|
|
15
15
|
|
|
16
|
-
Current Version: `0.
|
|
16
|
+
Current Version: `0.3.0`
|
|
17
17
|
|
|
18
|
-
-
|
|
19
|
-
-
|
|
18
|
+
- Ruby 4.0 supported
|
|
19
|
+
- spaCy 3.8 supported
|
|
20
|
+
- OpenAI GPT-5 API integration
|
|
20
21
|
|
|
21
22
|
## Installation of Prerequisites
|
|
22
23
|
|
|
@@ -522,12 +523,73 @@ Output:
|
|
|
522
523
|
| 9 | アルザス | 0.5644999742507935 |
|
|
523
524
|
| 10 | 南仏 | 0.5547999739646912 |
|
|
524
525
|
|
|
526
|
+
### PhraseMatcher
|
|
527
|
+
|
|
528
|
+
`PhraseMatcher` is more efficient than `Matcher` for matching large terminology lists. It's ideal for extracting known entities like product names, company names, or domain-specific terms.
|
|
529
|
+
|
|
530
|
+
**Basic usage:**
|
|
531
|
+
|
|
532
|
+
```ruby
|
|
533
|
+
require "ruby-spacy"
|
|
534
|
+
|
|
535
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
|
536
|
+
|
|
537
|
+
# Create a phrase matcher
|
|
538
|
+
matcher = nlp.phrase_matcher
|
|
539
|
+
matcher.add("PRODUCT", ["iPhone", "MacBook Pro", "iPad"])
|
|
540
|
+
|
|
541
|
+
doc = nlp.read("I bought an iPhone and a MacBook Pro yesterday.")
|
|
542
|
+
matches = matcher.match(doc)
|
|
543
|
+
|
|
544
|
+
matches.each do |span|
|
|
545
|
+
puts "#{span.text} => #{span.label}"
|
|
546
|
+
end
|
|
547
|
+
# => iPhone => PRODUCT
|
|
548
|
+
# => MacBook Pro => PRODUCT
|
|
549
|
+
```
|
|
550
|
+
|
|
551
|
+
**Case-insensitive matching:**
|
|
552
|
+
|
|
553
|
+
```ruby
|
|
554
|
+
# Use attr: "LOWER" for case-insensitive matching
|
|
555
|
+
matcher = nlp.phrase_matcher(attr: "LOWER")
|
|
556
|
+
matcher.add("COMPANY", ["apple", "google", "microsoft"])
|
|
557
|
+
|
|
558
|
+
doc = nlp.read("Apple and GOOGLE are competitors of Microsoft.")
|
|
559
|
+
matches = matcher.match(doc)
|
|
560
|
+
|
|
561
|
+
matches.each do |span|
|
|
562
|
+
puts span.text
|
|
563
|
+
end
|
|
564
|
+
# => Apple
|
|
565
|
+
# => GOOGLE
|
|
566
|
+
# => Microsoft
|
|
567
|
+
```
|
|
568
|
+
|
|
569
|
+
**Multiple categories:**
|
|
570
|
+
|
|
571
|
+
```ruby
|
|
572
|
+
matcher = nlp.phrase_matcher(attr: "LOWER")
|
|
573
|
+
matcher.add("TECH_COMPANY", ["apple", "google", "microsoft", "amazon"])
|
|
574
|
+
matcher.add("PRODUCT", ["iphone", "pixel", "surface", "kindle"])
|
|
575
|
+
|
|
576
|
+
doc = nlp.read("Apple released the new iPhone while Google announced Pixel updates.")
|
|
577
|
+
matches = matcher.match(doc)
|
|
578
|
+
|
|
579
|
+
matches.each do |span|
|
|
580
|
+
puts "#{span.text}: #{span.label}"
|
|
581
|
+
end
|
|
582
|
+
# => Apple: TECH_COMPANY
|
|
583
|
+
# => iPhone: PRODUCT
|
|
584
|
+
# => Google: TECH_COMPANY
|
|
585
|
+
# => Pixel: PRODUCT
|
|
586
|
+
```
|
|
525
587
|
|
|
526
588
|
## OpenAI API Integration
|
|
527
589
|
|
|
528
|
-
> ⚠️ This feature
|
|
590
|
+
> ⚠️ This feature requires GPT-5 series models. Please refer to OpenAI's [API reference](https://platform.openai.com/docs/api-reference) for details. Note: GPT-5 models do not support the `temperature` parameter.
|
|
529
591
|
|
|
530
|
-
Easily leverage GPT models within ruby-spacy by using an OpenAI API key. When constructing prompts for the `Doc::openai_query` method, you can incorporate the following token properties of the document. These properties are retrieved through
|
|
592
|
+
Easily leverage GPT models within ruby-spacy by using an OpenAI API key. When constructing prompts for the `Doc::openai_query` method, you can incorporate the following token properties of the document. These properties are retrieved through tool calls (made internally by GPT when necessary) and seamlessly integrated into your prompt. The available properties include:
|
|
531
593
|
|
|
532
594
|
- `surface`
|
|
533
595
|
- `lemma`
|
|
@@ -550,9 +612,8 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
|
550
612
|
doc = nlp.read("The Beatles released 12 studio albums")
|
|
551
613
|
|
|
552
614
|
# default parameter values
|
|
553
|
-
#
|
|
554
|
-
#
|
|
555
|
-
# model: "gpt-4o-mini"
|
|
615
|
+
# max_completion_tokens: 1000
|
|
616
|
+
# model: "gpt-5-mini"
|
|
556
617
|
res1 = doc.openai_query(
|
|
557
618
|
access_token: api_key,
|
|
558
619
|
prompt: "Translate the text to Japanese."
|
|
@@ -576,9 +637,8 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
|
576
637
|
doc = nlp.read("The Beatles were an English rock band formed in Liverpool in 1960.")
|
|
577
638
|
|
|
578
639
|
# default parameter values
|
|
579
|
-
#
|
|
580
|
-
#
|
|
581
|
-
# model: "gpt-4o-mini"
|
|
640
|
+
# max_completion_tokens: 1000
|
|
641
|
+
# model: "gpt-5-mini"
|
|
582
642
|
res = doc.openai_query(
|
|
583
643
|
access_token: api_key,
|
|
584
644
|
prompt: "Extract the topic of the document and list 10 entities (names, concepts, locations, etc.) that are relevant to the topic."
|
|
@@ -614,9 +674,8 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
|
614
674
|
doc = nlp.read("The Beatles released 12 studio albums")
|
|
615
675
|
|
|
616
676
|
# default parameter values
|
|
617
|
-
#
|
|
618
|
-
#
|
|
619
|
-
# model: "gpt-4o-mini"
|
|
677
|
+
# max_completion_tokens: 1000
|
|
678
|
+
# model: "gpt-5-mini"
|
|
620
679
|
res = doc.openai_query(
|
|
621
680
|
access_token: api_key,
|
|
622
681
|
prompt: "List token data of each of the words used in the sentence. Add 'meaning' property and value (brief semantic definition) to each token data. Output as a JSON object."
|
|
@@ -692,7 +751,7 @@ Output:
|
|
|
692
751
|
}
|
|
693
752
|
```
|
|
694
753
|
|
|
695
|
-
### GPT Prompting (Generate a
|
|
754
|
+
### GPT Prompting (Generate a Syntax Tree using Token Properties)
|
|
696
755
|
|
|
697
756
|
Ruby code:
|
|
698
757
|
|
|
@@ -704,11 +763,10 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
|
704
763
|
doc = nlp.read("The Beatles released 12 studio albums")
|
|
705
764
|
|
|
706
765
|
# default parameter values
|
|
707
|
-
#
|
|
708
|
-
#
|
|
766
|
+
# max_completion_tokens: 1000
|
|
767
|
+
# model: "gpt-5-mini"
|
|
709
768
|
res = doc.openai_query(
|
|
710
769
|
access_token: api_key,
|
|
711
|
-
model: "gpt-4",
|
|
712
770
|
prompt: "Generate a tree diagram from the text using given token data. Use the following bracketing style: [S [NP [Det the] [N cat]] [VP [V sat] [PP [P on] [NP the mat]]]"
|
|
713
771
|
)
|
|
714
772
|
puts res
|
|
@@ -747,9 +805,8 @@ nlp = Spacy::Language.new("en_core_web_sm")
|
|
|
747
805
|
doc = nlp.read("Vladimir Nabokov was a")
|
|
748
806
|
|
|
749
807
|
# default parameter values
|
|
750
|
-
#
|
|
751
|
-
#
|
|
752
|
-
# model: "gpt-4o-mini"
|
|
808
|
+
# max_completion_tokens: 1000
|
|
809
|
+
# model: "gpt-5-mini"
|
|
753
810
|
res = doc.openai_completion(access_token: api_key)
|
|
754
811
|
puts res
|
|
755
812
|
```
|
|
@@ -769,7 +826,7 @@ api_key = ENV["OPENAI_API_KEY"]
|
|
|
769
826
|
nlp = Spacy::Language.new("en_core_web_sm")
|
|
770
827
|
doc = nlp.read("Vladimir Nabokov was a Russian-American novelist, poet, translator and entomologist.")
|
|
771
828
|
|
|
772
|
-
# default model: text-embedding-
|
|
829
|
+
# default model: text-embedding-3-small
|
|
773
830
|
res = doc.openai_embeddings(access_token: api_key)
|
|
774
831
|
|
|
775
832
|
puts res
|
|
@@ -796,6 +853,47 @@ You can set a timeout for the `Spacy::Language.new` method:
|
|
|
796
853
|
nlp = Spacy::Language.new("en_core_web_sm", timeout: 120) # Set timeout to 120 seconds
|
|
797
854
|
```
|
|
798
855
|
|
|
856
|
+
### Document Serialization
|
|
857
|
+
|
|
858
|
+
You can serialize processed documents to binary format for caching or storage. This is useful when you want to avoid re-processing the same text multiple times.
|
|
859
|
+
|
|
860
|
+
**Saving a document:**
|
|
861
|
+
|
|
862
|
+
```ruby
|
|
863
|
+
require "ruby-spacy"
|
|
864
|
+
|
|
865
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
|
866
|
+
doc = nlp.read("Apple Inc. was founded by Steve Jobs in California.")
|
|
867
|
+
|
|
868
|
+
# Serialize to binary
|
|
869
|
+
bytes = doc.to_bytes
|
|
870
|
+
|
|
871
|
+
# Save to file
|
|
872
|
+
File.binwrite("doc_cache.bin", bytes)
|
|
873
|
+
```
|
|
874
|
+
|
|
875
|
+
**Restoring a document:**
|
|
876
|
+
|
|
877
|
+
```ruby
|
|
878
|
+
nlp = Spacy::Language.new("en_core_web_sm")
|
|
879
|
+
|
|
880
|
+
# Load from file
|
|
881
|
+
bytes = File.binread("doc_cache.bin")
|
|
882
|
+
|
|
883
|
+
# Restore the document (all annotations are preserved)
|
|
884
|
+
restored_doc = Spacy::Doc.from_bytes(nlp, bytes)
|
|
885
|
+
|
|
886
|
+
puts restored_doc.text
|
|
887
|
+
# => "Apple Inc. was founded by Steve Jobs in California."
|
|
888
|
+
|
|
889
|
+
restored_doc.ents.each do |ent|
|
|
890
|
+
puts "#{ent.text} (#{ent.label})"
|
|
891
|
+
end
|
|
892
|
+
# => Apple Inc. (ORG)
|
|
893
|
+
# => Steve Jobs (PERSON)
|
|
894
|
+
# => California (GPE)
|
|
895
|
+
```
|
|
896
|
+
|
|
799
897
|
## Author
|
|
800
898
|
|
|
801
899
|
Yoichiro Hasebe [<yohasebe@gmail.com>]
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "openssl"
|
|
5
|
+
require "uri"
|
|
6
|
+
require "json"
|
|
7
|
+
|
|
8
|
+
module Spacy
|
|
9
|
+
# A lightweight OpenAI API client with tools support for GPT-5 series models.
|
|
10
|
+
# This client implements the chat completions and embeddings endpoints
|
|
11
|
+
# without external dependencies.
|
|
12
|
+
class OpenAIClient
|
|
13
|
+
API_ENDPOINT = "https://api.openai.com/v1"
|
|
14
|
+
DEFAULT_TIMEOUT = 120
|
|
15
|
+
MAX_RETRIES = 3
|
|
16
|
+
RETRY_DELAY = 1
|
|
17
|
+
|
|
18
|
+
class APIError < StandardError
|
|
19
|
+
attr_reader :status_code, :response_body
|
|
20
|
+
|
|
21
|
+
def initialize(message, status_code: nil, response_body: nil)
|
|
22
|
+
@status_code = status_code
|
|
23
|
+
@response_body = response_body
|
|
24
|
+
super(message)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def initialize(access_token:, timeout: DEFAULT_TIMEOUT)
|
|
29
|
+
@access_token = access_token
|
|
30
|
+
@timeout = timeout
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Sends a chat completion request with optional tools support.
|
|
34
|
+
# Note: GPT-5 series models do not support the temperature parameter.
|
|
35
|
+
#
|
|
36
|
+
# @param model [String] The model to use (e.g., "gpt-5-mini")
|
|
37
|
+
# @param messages [Array<Hash>] The conversation messages
|
|
38
|
+
# @param max_completion_tokens [Integer] Maximum tokens in the response
|
|
39
|
+
# @param temperature [Float, nil] Sampling temperature (ignored for GPT-5 models)
|
|
40
|
+
# @param tools [Array<Hash>, nil] Tool definitions for function calling
|
|
41
|
+
# @param tool_choice [String, Hash, nil] Tool selection strategy
|
|
42
|
+
# @return [Hash] The API response
|
|
43
|
+
def chat(model:, messages:, max_completion_tokens: 1000, temperature: nil, tools: nil, tool_choice: nil)
|
|
44
|
+
body = {
|
|
45
|
+
model: model,
|
|
46
|
+
messages: messages,
|
|
47
|
+
max_completion_tokens: max_completion_tokens
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
# GPT-5 series models do not support temperature parameter
|
|
51
|
+
unless gpt5_model?(model)
|
|
52
|
+
body[:temperature] = temperature || 0.7
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
if tools && !tools.empty?
|
|
56
|
+
body[:tools] = tools
|
|
57
|
+
body[:tool_choice] = tool_choice || "auto"
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
post("/chat/completions", body)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Checks if the model is a GPT-5 series model.
|
|
64
|
+
# GPT-5 models have different parameter requirements (no temperature support).
|
|
65
|
+
def gpt5_model?(model)
|
|
66
|
+
model.to_s.start_with?("gpt-5")
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Sends an embeddings request.
|
|
70
|
+
#
|
|
71
|
+
# @param model [String] The embeddings model (e.g., "text-embedding-3-small")
|
|
72
|
+
# @param input [String] The text to embed
|
|
73
|
+
# @return [Hash] The API response
|
|
74
|
+
def embeddings(model:, input:)
|
|
75
|
+
body = {
|
|
76
|
+
model: model,
|
|
77
|
+
input: input
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
post("/embeddings", body)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
private
|
|
84
|
+
|
|
85
|
+
# Creates a certificate store with system CA certificates but without CRL checking.
|
|
86
|
+
# This avoids "unable to get certificate CRL" errors on some systems.
|
|
87
|
+
def default_cert_store
|
|
88
|
+
store = OpenSSL::X509::Store.new
|
|
89
|
+
store.set_default_paths
|
|
90
|
+
store
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def post(path, body)
|
|
94
|
+
uri = URI.parse("#{API_ENDPOINT}#{path}")
|
|
95
|
+
retries = 0
|
|
96
|
+
|
|
97
|
+
begin
|
|
98
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
99
|
+
http.use_ssl = true
|
|
100
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
|
|
101
|
+
http.cert_store = default_cert_store
|
|
102
|
+
http.open_timeout = @timeout
|
|
103
|
+
http.read_timeout = @timeout
|
|
104
|
+
|
|
105
|
+
request = Net::HTTP::Post.new(uri.path)
|
|
106
|
+
request["Content-Type"] = "application/json"
|
|
107
|
+
request["Authorization"] = "Bearer #{@access_token}"
|
|
108
|
+
request.body = body.to_json
|
|
109
|
+
|
|
110
|
+
response = http.request(request)
|
|
111
|
+
|
|
112
|
+
handle_response(response)
|
|
113
|
+
rescue Net::OpenTimeout, Net::ReadTimeout => e
|
|
114
|
+
retries += 1
|
|
115
|
+
if retries <= MAX_RETRIES
|
|
116
|
+
sleep RETRY_DELAY
|
|
117
|
+
retry
|
|
118
|
+
end
|
|
119
|
+
raise APIError.new("Request timed out after #{MAX_RETRIES} retries: #{e.message}")
|
|
120
|
+
rescue Errno::ECONNREFUSED, Errno::ECONNRESET, SocketError => e
|
|
121
|
+
retries += 1
|
|
122
|
+
if retries <= MAX_RETRIES
|
|
123
|
+
sleep RETRY_DELAY
|
|
124
|
+
retry
|
|
125
|
+
end
|
|
126
|
+
raise APIError.new("Network error after #{MAX_RETRIES} retries: #{e.message}")
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def handle_response(response)
|
|
131
|
+
body = JSON.parse(response.body)
|
|
132
|
+
|
|
133
|
+
case response.code.to_i
|
|
134
|
+
when 200
|
|
135
|
+
body
|
|
136
|
+
when 400..499
|
|
137
|
+
error_message = body.dig("error", "message") || "Client error"
|
|
138
|
+
raise APIError.new(error_message, status_code: response.code.to_i, response_body: body)
|
|
139
|
+
when 500..599
|
|
140
|
+
error_message = body.dig("error", "message") || "Server error"
|
|
141
|
+
raise APIError.new(error_message, status_code: response.code.to_i, response_body: body)
|
|
142
|
+
else
|
|
143
|
+
raise APIError.new("Unexpected response: #{response.code}", status_code: response.code.to_i, response_body: body)
|
|
144
|
+
end
|
|
145
|
+
rescue JSON::ParserError
|
|
146
|
+
raise APIError.new("Invalid JSON response", status_code: response.code.to_i, response_body: response.body)
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
data/lib/ruby-spacy/version.rb
CHANGED
data/lib/ruby-spacy.rb
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative "ruby-spacy/version"
|
|
4
|
+
require_relative "ruby-spacy/openai_client"
|
|
4
5
|
require "numpy"
|
|
5
|
-
require "openai"
|
|
6
6
|
require "pycall"
|
|
7
7
|
require "strscan"
|
|
8
8
|
require "timeout"
|
|
9
|
+
require "json"
|
|
9
10
|
|
|
10
11
|
begin
|
|
11
12
|
PyCall.init
|
|
@@ -39,6 +40,9 @@ module Spacy
|
|
|
39
40
|
# Python `Matcher` class object
|
|
40
41
|
PyMatcher = spacy.matcher.Matcher
|
|
41
42
|
|
|
43
|
+
# Python `PhraseMatcher` class object
|
|
44
|
+
PyPhraseMatcher = spacy.matcher.PhraseMatcher
|
|
45
|
+
|
|
42
46
|
# Python `displacy` object
|
|
43
47
|
PyDisplacy = PyCall.import_module('spacy.displacy')
|
|
44
48
|
|
|
@@ -49,18 +53,6 @@ module Spacy
|
|
|
49
53
|
PyCall::List.call(py_generator)
|
|
50
54
|
end
|
|
51
55
|
|
|
52
|
-
@openai_client = nil
|
|
53
|
-
|
|
54
|
-
def self.openai_client(access_token:)
|
|
55
|
-
# If @client is already set, just return it. Otherwise, create a new instance.
|
|
56
|
-
@openai_client ||= OpenAI::Client.new(access_token: access_token)
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
# Provide an accessor method to get the client (optional)
|
|
60
|
-
def self.client
|
|
61
|
-
@openai_client
|
|
62
|
-
end
|
|
63
|
-
|
|
64
56
|
# See also spaCy Python API document for [`Doc`](https://spacy.io/api/doc).
|
|
65
57
|
class Doc
|
|
66
58
|
# @return [Object] a Python `Language` instance accessible via `PyCall`
|
|
@@ -216,6 +208,30 @@ module Spacy
|
|
|
216
208
|
py_doc.similarity(other.py_doc)
|
|
217
209
|
end
|
|
218
210
|
|
|
211
|
+
# Serializes the doc to a binary string.
|
|
212
|
+
# The binary data includes all annotations (tokens, entities, etc.) and can be
|
|
213
|
+
# used to restore the doc later without re-processing.
|
|
214
|
+
# @return [String] binary representation of the doc
|
|
215
|
+
# @example Save doc to file
|
|
216
|
+
# doc = nlp.read("Hello world")
|
|
217
|
+
# File.binwrite("doc.bin", doc.to_bytes)
|
|
218
|
+
def to_bytes
|
|
219
|
+
@py_doc.to_bytes.force_encoding(Encoding::BINARY)
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# Restores a doc from binary data created by {#to_bytes}.
|
|
223
|
+
# This is useful for caching processed documents to avoid re-processing.
|
|
224
|
+
# @param byte_string [String] binary data from {#to_bytes}
|
|
225
|
+
# @return [Doc] the restored doc
|
|
226
|
+
# @example Load doc from file
|
|
227
|
+
# bytes = File.binread("doc.bin")
|
|
228
|
+
# doc = Spacy::Doc.from_bytes(nlp, bytes)
|
|
229
|
+
def self.from_bytes(nlp, byte_string)
|
|
230
|
+
py_bytes = PyCall.eval("bytes(#{byte_string.bytes})")
|
|
231
|
+
py_doc = nlp.py_nlp.call("").from_bytes(py_bytes)
|
|
232
|
+
new(nlp.py_nlp, py_doc: py_doc)
|
|
233
|
+
end
|
|
234
|
+
|
|
219
235
|
# Visualize the document in one of two styles: "dep" (dependencies) or "ent" (named entities).
|
|
220
236
|
# @param style [String] either `dep` or `ent`
|
|
221
237
|
# @param compact [Boolean] only relevant to the `dep' style
|
|
@@ -224,12 +240,26 @@ module Spacy
|
|
|
224
240
|
PyDisplacy.render(py_doc, style: style, options: { compact: compact }, jupyter: false)
|
|
225
241
|
end
|
|
226
242
|
|
|
243
|
+
# Sends a query to OpenAI's chat completion API with optional tool support.
|
|
244
|
+
# The get_tokens tool allows the model to request token-level linguistic analysis.
|
|
245
|
+
#
|
|
246
|
+
# @param access_token [String, nil] OpenAI API key (defaults to OPENAI_API_KEY env var)
|
|
247
|
+
# @param max_completion_tokens [Integer] Maximum tokens in the response
|
|
248
|
+
# @param max_tokens [Integer] Alias for max_completion_tokens (deprecated, for backward compatibility)
|
|
249
|
+
# @param temperature [Float] Sampling temperature (ignored for GPT-5 models)
|
|
250
|
+
# @param model [String] The model to use (default: gpt-5-mini)
|
|
251
|
+
# @param messages [Array<Hash>] Conversation history (for recursive tool calls)
|
|
252
|
+
# @param prompt [String, nil] System prompt for the query
|
|
253
|
+
# @return [String, nil] The model's response content
|
|
227
254
|
def openai_query(access_token: nil,
|
|
228
|
-
|
|
255
|
+
max_completion_tokens: nil,
|
|
256
|
+
max_tokens: nil,
|
|
229
257
|
temperature: 0.7,
|
|
230
|
-
model: "gpt-
|
|
258
|
+
model: "gpt-5-mini",
|
|
231
259
|
messages: [],
|
|
232
260
|
prompt: nil)
|
|
261
|
+
# Support both max_completion_tokens and max_tokens for backward compatibility
|
|
262
|
+
max_completion_tokens ||= max_tokens || 1000
|
|
233
263
|
if messages.empty?
|
|
234
264
|
messages = [
|
|
235
265
|
{ role: "system", content: prompt },
|
|
@@ -240,110 +270,134 @@ module Spacy
|
|
|
240
270
|
access_token ||= ENV["OPENAI_API_KEY"]
|
|
241
271
|
raise "Error: OPENAI_API_KEY is not set" unless access_token
|
|
242
272
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
"parameters": {
|
|
257
|
-
"type": "object",
|
|
258
|
-
"properties": {
|
|
259
|
-
"text": {
|
|
260
|
-
"type": "string",
|
|
261
|
-
"description": "text to be tokenized"
|
|
262
|
-
}
|
|
263
|
-
},
|
|
264
|
-
"required": ["text"]
|
|
273
|
+
# Tool definition for token analysis (GPT-5 tools API format)
|
|
274
|
+
tools = [
|
|
275
|
+
{
|
|
276
|
+
type: "function",
|
|
277
|
+
function: {
|
|
278
|
+
name: "get_tokens",
|
|
279
|
+
description: "Tokenize given text and return a list of tokens with their attributes: surface, lemma, tag, pos (part-of-speech), dep (dependency), ent_type (entity type), and morphology",
|
|
280
|
+
parameters: {
|
|
281
|
+
type: "object",
|
|
282
|
+
properties: {
|
|
283
|
+
text: {
|
|
284
|
+
type: "string",
|
|
285
|
+
description: "text to be tokenized"
|
|
265
286
|
}
|
|
266
|
-
}
|
|
267
|
-
|
|
287
|
+
},
|
|
288
|
+
required: ["text"]
|
|
289
|
+
}
|
|
268
290
|
}
|
|
269
|
-
|
|
291
|
+
}
|
|
292
|
+
]
|
|
293
|
+
|
|
294
|
+
client = OpenAIClient.new(access_token: access_token)
|
|
295
|
+
response = client.chat(
|
|
296
|
+
model: model,
|
|
297
|
+
messages: messages,
|
|
298
|
+
max_completion_tokens: max_completion_tokens,
|
|
299
|
+
temperature: temperature,
|
|
300
|
+
tools: tools,
|
|
301
|
+
tool_choice: "auto"
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
message = response.dig("choices", 0, "message")
|
|
270
305
|
|
|
271
|
-
|
|
306
|
+
# Handle tool calls (GPT-5 format)
|
|
307
|
+
if message["tool_calls"] && !message["tool_calls"].empty?
|
|
308
|
+
messages << message
|
|
309
|
+
|
|
310
|
+
message["tool_calls"].each do |tool_call|
|
|
311
|
+
function_name = tool_call.dig("function", "name")
|
|
312
|
+
tool_call_id = tool_call["id"]
|
|
272
313
|
|
|
273
|
-
if message["role"] == "assistant" && message["function_call"]
|
|
274
|
-
messages << message
|
|
275
|
-
function_name = message.dig("function_call", "name")
|
|
276
|
-
_args = JSON.parse(message.dig("function_call", "arguments"))
|
|
277
314
|
case function_name
|
|
278
315
|
when "get_tokens"
|
|
279
|
-
|
|
316
|
+
result = tokens.map do |t|
|
|
280
317
|
{
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
318
|
+
surface: t.text,
|
|
319
|
+
lemma: t.lemma,
|
|
320
|
+
pos: t.pos,
|
|
321
|
+
tag: t.tag,
|
|
322
|
+
dep: t.dep,
|
|
323
|
+
ent_type: t.ent_type,
|
|
324
|
+
morphology: t.morphology
|
|
288
325
|
}
|
|
289
326
|
end.to_json
|
|
327
|
+
|
|
328
|
+
messages << {
|
|
329
|
+
role: "tool",
|
|
330
|
+
tool_call_id: tool_call_id,
|
|
331
|
+
content: result
|
|
332
|
+
}
|
|
290
333
|
end
|
|
291
|
-
messages << { role: "system", content: res }
|
|
292
|
-
openai_query(access_token: access_token, max_tokens: max_tokens,
|
|
293
|
-
temperature: temperature, model: model,
|
|
294
|
-
messages: messages, prompt: prompt)
|
|
295
|
-
else
|
|
296
|
-
message["content"]
|
|
297
334
|
end
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
335
|
+
|
|
336
|
+
# Recursive call to get final response after tool execution
|
|
337
|
+
openai_query(
|
|
338
|
+
access_token: access_token,
|
|
339
|
+
max_completion_tokens: max_completion_tokens,
|
|
340
|
+
temperature: temperature,
|
|
341
|
+
model: model,
|
|
342
|
+
messages: messages,
|
|
343
|
+
prompt: prompt
|
|
344
|
+
)
|
|
345
|
+
else
|
|
346
|
+
message["content"]
|
|
302
347
|
end
|
|
303
|
-
|
|
348
|
+
rescue OpenAIClient::APIError => e
|
|
349
|
+
puts "Error: OpenAI API call failed - #{e.message}"
|
|
350
|
+
nil
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
# Sends a text completion request to OpenAI's chat API.
|
|
354
|
+
#
|
|
355
|
+
# @param access_token [String, nil] OpenAI API key (defaults to OPENAI_API_KEY env var)
|
|
356
|
+
# @param max_completion_tokens [Integer] Maximum tokens in the response
|
|
357
|
+
# @param max_tokens [Integer] Alias for max_completion_tokens (deprecated, for backward compatibility)
|
|
358
|
+
# @param temperature [Float] Sampling temperature (ignored for GPT-5 models)
|
|
359
|
+
# @param model [String] The model to use (default: gpt-5-mini)
|
|
360
|
+
# @return [String, nil] The completed text
|
|
361
|
+
def openai_completion(access_token: nil, max_completion_tokens: nil, max_tokens: nil, temperature: 0.7, model: "gpt-5-mini")
|
|
362
|
+
# Support both max_completion_tokens and max_tokens for backward compatibility
|
|
363
|
+
max_completion_tokens ||= max_tokens || 1000
|
|
304
364
|
|
|
305
|
-
def openai_completion(access_token: nil, max_tokens: 1000, temperature: 0.7, model: "gpt-4o-mini")
|
|
306
365
|
messages = [
|
|
307
366
|
{ role: "system", content: "Complete the text input by the user." },
|
|
308
367
|
{ role: "user", content: @text }
|
|
309
368
|
]
|
|
369
|
+
|
|
310
370
|
access_token ||= ENV["OPENAI_API_KEY"]
|
|
311
371
|
raise "Error: OPENAI_API_KEY is not set" unless access_token
|
|
312
372
|
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
373
|
+
client = OpenAIClient.new(access_token: access_token)
|
|
374
|
+
response = client.chat(
|
|
375
|
+
model: model,
|
|
376
|
+
messages: messages,
|
|
377
|
+
max_completion_tokens: max_completion_tokens,
|
|
378
|
+
temperature: temperature
|
|
379
|
+
)
|
|
380
|
+
response.dig("choices", 0, "message", "content")
|
|
381
|
+
rescue OpenAIClient::APIError => e
|
|
382
|
+
puts "Error: OpenAI API call failed - #{e.message}"
|
|
383
|
+
nil
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
# Generates text embeddings using OpenAI's embeddings API.
|
|
387
|
+
#
|
|
388
|
+
# @param access_token [String, nil] OpenAI API key (defaults to OPENAI_API_KEY env var)
|
|
389
|
+
# @param model [String] The embeddings model (default: text-embedding-3-small)
|
|
390
|
+
# @return [Array<Float>, nil] The embedding vector
|
|
391
|
+
def openai_embeddings(access_token: nil, model: "text-embedding-3-small")
|
|
331
392
|
access_token ||= ENV["OPENAI_API_KEY"]
|
|
332
393
|
raise "Error: OPENAI_API_KEY is not set" unless access_token
|
|
333
394
|
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
)
|
|
341
|
-
response.dig("data", 0, "embedding")
|
|
342
|
-
rescue StandardError => e
|
|
343
|
-
puts "Error: OpenAI API call failed."
|
|
344
|
-
pp e.message
|
|
345
|
-
pp e.backtrace
|
|
346
|
-
end
|
|
395
|
+
client = OpenAIClient.new(access_token: access_token)
|
|
396
|
+
response = client.embeddings(model: model, input: @text)
|
|
397
|
+
response.dig("data", 0, "embedding")
|
|
398
|
+
rescue OpenAIClient::APIError => e
|
|
399
|
+
puts "Error: OpenAI API call failed - #{e.message}"
|
|
400
|
+
nil
|
|
347
401
|
end
|
|
348
402
|
|
|
349
403
|
# Methods defined in Python but not wrapped in ruby-spacy can be called by this dynamic method handling mechanism.
|
|
@@ -351,7 +405,7 @@ module Spacy
|
|
|
351
405
|
@py_doc.send(name, *args)
|
|
352
406
|
end
|
|
353
407
|
|
|
354
|
-
def respond_to_missing?(sym)
|
|
408
|
+
def respond_to_missing?(sym, *args)
|
|
355
409
|
sym ? true : super
|
|
356
410
|
end
|
|
357
411
|
end
|
|
@@ -398,6 +452,18 @@ module Spacy
|
|
|
398
452
|
Matcher.new(@py_nlp)
|
|
399
453
|
end
|
|
400
454
|
|
|
455
|
+
# Generates a phrase matcher for the current language model.
|
|
456
|
+
# PhraseMatcher is more efficient than {Matcher} for matching large terminology lists.
|
|
457
|
+
# @param attr [String] the token attribute to match on (default: "ORTH").
|
|
458
|
+
# Use "LOWER" for case-insensitive matching.
|
|
459
|
+
# @return [PhraseMatcher]
|
|
460
|
+
# @example
|
|
461
|
+
# matcher = nlp.phrase_matcher(attr: "LOWER")
|
|
462
|
+
# matcher.add("PRODUCT", ["iPhone", "MacBook Pro"])
|
|
463
|
+
def phrase_matcher(attr: "ORTH")
|
|
464
|
+
PhraseMatcher.new(self, attr: attr)
|
|
465
|
+
end
|
|
466
|
+
|
|
401
467
|
# A utility method to lookup a vocabulary item of the given id.
|
|
402
468
|
# @param id [Integer] a vocabulary id
|
|
403
469
|
# @return [Object] a Python `Lexeme` object (https://spacy.io/api/lexeme)
|
|
@@ -473,7 +539,7 @@ module Spacy
|
|
|
473
539
|
@py_nlp.send(name, *args)
|
|
474
540
|
end
|
|
475
541
|
|
|
476
|
-
def respond_to_missing?(sym)
|
|
542
|
+
def respond_to_missing?(sym, *args)
|
|
477
543
|
sym ? true : super
|
|
478
544
|
end
|
|
479
545
|
end
|
|
@@ -516,6 +582,54 @@ module Spacy
|
|
|
516
582
|
end
|
|
517
583
|
end
|
|
518
584
|
|
|
585
|
+
# See also spaCy Python API document for [`PhraseMatcher`](https://spacy.io/api/phrasematcher).
|
|
586
|
+
# PhraseMatcher is useful for efficiently matching large terminology lists.
|
|
587
|
+
# It's faster than {Matcher} when matching many phrase patterns.
|
|
588
|
+
class PhraseMatcher
|
|
589
|
+
# @return [Object] a Python `PhraseMatcher` instance accessible via `PyCall`
|
|
590
|
+
attr_reader :py_matcher
|
|
591
|
+
|
|
592
|
+
# @return [Language] the language model used by this matcher
|
|
593
|
+
attr_reader :nlp
|
|
594
|
+
|
|
595
|
+
# Creates a {PhraseMatcher} instance.
|
|
596
|
+
# @param nlp [Language] an instance of {Language} class
|
|
597
|
+
# @param attr [String] the token attribute to match on (default: "ORTH").
|
|
598
|
+
# Use "LOWER" for case-insensitive matching.
|
|
599
|
+
# @example Case-insensitive matching
|
|
600
|
+
# matcher = Spacy::PhraseMatcher.new(nlp, attr: "LOWER")
|
|
601
|
+
def initialize(nlp, attr: "ORTH")
|
|
602
|
+
@nlp = nlp
|
|
603
|
+
@py_matcher = PyPhraseMatcher.call(nlp.py_nlp.vocab, attr: attr)
|
|
604
|
+
end
|
|
605
|
+
|
|
606
|
+
# Adds phrase patterns to the matcher.
|
|
607
|
+
# @param label [String] a label string given to the patterns
|
|
608
|
+
# @param phrases [Array<String>] an array of phrase strings to match
|
|
609
|
+
# @example Add product names
|
|
610
|
+
# matcher.add("PRODUCT", ["iPhone", "MacBook Pro", "iPad"])
|
|
611
|
+
def add(label, phrases)
|
|
612
|
+
patterns = phrases.map { |phrase| @nlp.py_nlp.call(phrase) }
|
|
613
|
+
@py_matcher.add(label, patterns)
|
|
614
|
+
end
|
|
615
|
+
|
|
616
|
+
# Execute the phrase match and return matching spans.
|
|
617
|
+
# @param doc [Doc] a {Doc} instance to search
|
|
618
|
+
# @return [Array<Span>] an array of {Span} objects with labels
|
|
619
|
+
# @example Find matches
|
|
620
|
+
# matches = matcher.match(doc)
|
|
621
|
+
# matches.each { |span| puts "#{span.text} => #{span.label}" }
|
|
622
|
+
def match(doc)
|
|
623
|
+
py_matches = @py_matcher.call(doc.py_doc, as_spans: true)
|
|
624
|
+
results = []
|
|
625
|
+
PyCall::List.call(py_matches).each do |py_span|
|
|
626
|
+
span = Span.new(doc, py_span: py_span)
|
|
627
|
+
results << span
|
|
628
|
+
end
|
|
629
|
+
results
|
|
630
|
+
end
|
|
631
|
+
end
|
|
632
|
+
|
|
519
633
|
# See also spaCy Python API document for [`Span`](https://spacy.io/api/span).
|
|
520
634
|
class Span
|
|
521
635
|
# @return [Object] a Python `Span` instance accessible via `PyCall`
|
|
@@ -679,7 +793,7 @@ module Spacy
|
|
|
679
793
|
@py_span.send(name, *args)
|
|
680
794
|
end
|
|
681
795
|
|
|
682
|
-
def respond_to_missing?(sym)
|
|
796
|
+
def respond_to_missing?(sym, *args)
|
|
683
797
|
sym ? true : super
|
|
684
798
|
end
|
|
685
799
|
end
|
|
@@ -845,7 +959,7 @@ module Spacy
|
|
|
845
959
|
@py_token.send(name, *args)
|
|
846
960
|
end
|
|
847
961
|
|
|
848
|
-
def respond_to_missing?(sym)
|
|
962
|
+
def respond_to_missing?(sym, *args)
|
|
849
963
|
sym ? true : super
|
|
850
964
|
end
|
|
851
965
|
end
|
|
@@ -920,7 +1034,7 @@ module Spacy
|
|
|
920
1034
|
@py_lexeme.send(name, *args)
|
|
921
1035
|
end
|
|
922
1036
|
|
|
923
|
-
def respond_to_missing?(sym)
|
|
1037
|
+
def respond_to_missing?(sym, *args)
|
|
924
1038
|
sym ? true : super
|
|
925
1039
|
end
|
|
926
1040
|
end
|
data/ruby-spacy.gemspec
CHANGED
|
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec|
|
|
|
15
15
|
|
|
16
16
|
spec.homepage = "https://github.com/yohasebe/ruby-spacy"
|
|
17
17
|
spec.license = "MIT"
|
|
18
|
-
spec.required_ruby_version = Gem::Requirement.new(">=
|
|
18
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 3.1")
|
|
19
19
|
|
|
20
20
|
# Specify which files should be added to the gem when it is released.
|
|
21
21
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
|
@@ -31,9 +31,9 @@ Gem::Specification.new do |spec|
|
|
|
31
31
|
spec.add_development_dependency "rspec"
|
|
32
32
|
spec.add_development_dependency "solargraph"
|
|
33
33
|
|
|
34
|
+
spec.add_dependency "fiddle" # Required for Ruby 4.0+ (moved from default to bundled gem)
|
|
34
35
|
spec.add_dependency "numpy", "~> 0.4.0"
|
|
35
36
|
spec.add_dependency "pycall", "~> 1.5.1"
|
|
36
|
-
spec.add_dependency "ruby-openai"
|
|
37
37
|
spec.add_dependency "terminal-table", "~> 3.0.1"
|
|
38
38
|
|
|
39
39
|
# For more information and examples about making a new gem, checkout our
|
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ruby-spacy
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Yoichiro Hasebe
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: bundler
|
|
@@ -67,47 +66,47 @@ dependencies:
|
|
|
67
66
|
- !ruby/object:Gem::Version
|
|
68
67
|
version: '0'
|
|
69
68
|
- !ruby/object:Gem::Dependency
|
|
70
|
-
name:
|
|
69
|
+
name: fiddle
|
|
71
70
|
requirement: !ruby/object:Gem::Requirement
|
|
72
71
|
requirements:
|
|
73
|
-
- - "
|
|
72
|
+
- - ">="
|
|
74
73
|
- !ruby/object:Gem::Version
|
|
75
|
-
version: 0
|
|
74
|
+
version: '0'
|
|
76
75
|
type: :runtime
|
|
77
76
|
prerelease: false
|
|
78
77
|
version_requirements: !ruby/object:Gem::Requirement
|
|
79
78
|
requirements:
|
|
80
|
-
- - "
|
|
79
|
+
- - ">="
|
|
81
80
|
- !ruby/object:Gem::Version
|
|
82
|
-
version: 0
|
|
81
|
+
version: '0'
|
|
83
82
|
- !ruby/object:Gem::Dependency
|
|
84
|
-
name:
|
|
83
|
+
name: numpy
|
|
85
84
|
requirement: !ruby/object:Gem::Requirement
|
|
86
85
|
requirements:
|
|
87
86
|
- - "~>"
|
|
88
87
|
- !ruby/object:Gem::Version
|
|
89
|
-
version:
|
|
88
|
+
version: 0.4.0
|
|
90
89
|
type: :runtime
|
|
91
90
|
prerelease: false
|
|
92
91
|
version_requirements: !ruby/object:Gem::Requirement
|
|
93
92
|
requirements:
|
|
94
93
|
- - "~>"
|
|
95
94
|
- !ruby/object:Gem::Version
|
|
96
|
-
version:
|
|
95
|
+
version: 0.4.0
|
|
97
96
|
- !ruby/object:Gem::Dependency
|
|
98
|
-
name:
|
|
97
|
+
name: pycall
|
|
99
98
|
requirement: !ruby/object:Gem::Requirement
|
|
100
99
|
requirements:
|
|
101
|
-
- - "
|
|
100
|
+
- - "~>"
|
|
102
101
|
- !ruby/object:Gem::Version
|
|
103
|
-
version:
|
|
102
|
+
version: 1.5.1
|
|
104
103
|
type: :runtime
|
|
105
104
|
prerelease: false
|
|
106
105
|
version_requirements: !ruby/object:Gem::Requirement
|
|
107
106
|
requirements:
|
|
108
|
-
- - "
|
|
107
|
+
- - "~>"
|
|
109
108
|
- !ruby/object:Gem::Version
|
|
110
|
-
version:
|
|
109
|
+
version: 1.5.1
|
|
111
110
|
- !ruby/object:Gem::Dependency
|
|
112
111
|
name: terminal-table
|
|
113
112
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -203,13 +202,13 @@ files:
|
|
|
203
202
|
- examples/rule_based_matching/creating_spans_from_matches.rb
|
|
204
203
|
- examples/rule_based_matching/matcher.rb
|
|
205
204
|
- lib/ruby-spacy.rb
|
|
205
|
+
- lib/ruby-spacy/openai_client.rb
|
|
206
206
|
- lib/ruby-spacy/version.rb
|
|
207
207
|
- ruby-spacy.gemspec
|
|
208
208
|
homepage: https://github.com/yohasebe/ruby-spacy
|
|
209
209
|
licenses:
|
|
210
210
|
- MIT
|
|
211
211
|
metadata: {}
|
|
212
|
-
post_install_message:
|
|
213
212
|
rdoc_options: []
|
|
214
213
|
require_paths:
|
|
215
214
|
- lib
|
|
@@ -217,15 +216,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
217
216
|
requirements:
|
|
218
217
|
- - ">="
|
|
219
218
|
- !ruby/object:Gem::Version
|
|
220
|
-
version: '
|
|
219
|
+
version: '3.1'
|
|
221
220
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
222
221
|
requirements:
|
|
223
222
|
- - ">="
|
|
224
223
|
- !ruby/object:Gem::Version
|
|
225
224
|
version: '0'
|
|
226
225
|
requirements: []
|
|
227
|
-
rubygems_version: 3.
|
|
228
|
-
signing_key:
|
|
226
|
+
rubygems_version: 3.6.9
|
|
229
227
|
specification_version: 4
|
|
230
228
|
summary: A wrapper module for using spaCy natural language processing library from
|
|
231
229
|
the Ruby programming language using PyCall
|