transformers-rb 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE.txt +203 -0
  4. data/README.md +163 -0
  5. data/lib/transformers/activations.rb +57 -0
  6. data/lib/transformers/configuration_utils.rb +285 -0
  7. data/lib/transformers/convert_slow_tokenizer.rb +90 -0
  8. data/lib/transformers/data/processors/squad.rb +115 -0
  9. data/lib/transformers/dynamic_module_utils.rb +25 -0
  10. data/lib/transformers/feature_extraction_utils.rb +110 -0
  11. data/lib/transformers/hf_hub/constants.rb +71 -0
  12. data/lib/transformers/hf_hub/errors.rb +11 -0
  13. data/lib/transformers/hf_hub/file_download.rb +764 -0
  14. data/lib/transformers/hf_hub/utils/_errors.rb +94 -0
  15. data/lib/transformers/hf_hub/utils/_headers.rb +109 -0
  16. data/lib/transformers/image_processing_base.rb +169 -0
  17. data/lib/transformers/image_processing_utils.rb +63 -0
  18. data/lib/transformers/image_transforms.rb +208 -0
  19. data/lib/transformers/image_utils.rb +165 -0
  20. data/lib/transformers/modeling_outputs.rb +81 -0
  21. data/lib/transformers/modeling_utils.rb +888 -0
  22. data/lib/transformers/models/auto/auto_factory.rb +138 -0
  23. data/lib/transformers/models/auto/configuration_auto.rb +61 -0
  24. data/lib/transformers/models/auto/feature_extraction_auto.rb +20 -0
  25. data/lib/transformers/models/auto/image_processing_auto.rb +104 -0
  26. data/lib/transformers/models/auto/modeling_auto.rb +80 -0
  27. data/lib/transformers/models/auto/tokenization_auto.rb +160 -0
  28. data/lib/transformers/models/bert/configuration_bert.rb +65 -0
  29. data/lib/transformers/models/bert/modeling_bert.rb +836 -0
  30. data/lib/transformers/models/bert/tokenization_bert.rb +115 -0
  31. data/lib/transformers/models/bert/tokenization_bert_fast.rb +52 -0
  32. data/lib/transformers/models/distilbert/configuration_distilbert.rb +63 -0
  33. data/lib/transformers/models/distilbert/modeling_distilbert.rb +616 -0
  34. data/lib/transformers/models/distilbert/tokenization_distilbert.rb +114 -0
  35. data/lib/transformers/models/distilbert/tokenization_distilbert_fast.rb +71 -0
  36. data/lib/transformers/models/vit/configuration_vit.rb +60 -0
  37. data/lib/transformers/models/vit/image_processing_vit.rb +170 -0
  38. data/lib/transformers/models/vit/modeling_vit.rb +506 -0
  39. data/lib/transformers/pipelines/_init.rb +348 -0
  40. data/lib/transformers/pipelines/base.rb +301 -0
  41. data/lib/transformers/pipelines/feature_extraction.rb +47 -0
  42. data/lib/transformers/pipelines/image_classification.rb +110 -0
  43. data/lib/transformers/pipelines/image_feature_extraction.rb +56 -0
  44. data/lib/transformers/pipelines/pt_utils.rb +53 -0
  45. data/lib/transformers/pipelines/question_answering.rb +508 -0
  46. data/lib/transformers/pipelines/text_classification.rb +123 -0
  47. data/lib/transformers/pipelines/token_classification.rb +282 -0
  48. data/lib/transformers/ruby_utils.rb +33 -0
  49. data/lib/transformers/sentence_transformer.rb +37 -0
  50. data/lib/transformers/tokenization_utils.rb +152 -0
  51. data/lib/transformers/tokenization_utils_base.rb +937 -0
  52. data/lib/transformers/tokenization_utils_fast.rb +386 -0
  53. data/lib/transformers/torch_utils.rb +25 -0
  54. data/lib/transformers/utils/_init.rb +31 -0
  55. data/lib/transformers/utils/generic.rb +107 -0
  56. data/lib/transformers/utils/hub.rb +209 -0
  57. data/lib/transformers/utils/import_utils.rb +45 -0
  58. data/lib/transformers/utils/logging.rb +52 -0
  59. data/lib/transformers/version.rb +3 -0
  60. data/lib/transformers-rb.rb +1 -0
  61. data/lib/transformers.rb +100 -0
  62. data/licenses/LICENSE-huggingface-hub.txt +201 -0
  63. data/licenses/LICENSE-sentence-transformers.txt +201 -0
  64. data/licenses/NOTICE-sentence-transformers.txt +5 -0
  65. metadata +161 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: c8f34c5454e2a1ac18bbb9a4b290a43e994cd3984fa2b4125ff4af969b9d17ed
4
+ data.tar.gz: 57c876fd1a4e62089fdc7bcbfcb9c155050166458a679991036894a6721ac168
5
+ SHA512:
6
+ metadata.gz: 7458b1ba0303e0741abf16a63efc350b6cad5e5dff48c46dcba6f62858f562bbf83478eb918995c45f5882159cf5c22d696cc4b0360f813312c2263da7c28205
7
+ data.tar.gz: a4d98b210a22d23bc55f452a93dd0e9df20c81cbc0d537d29eaa1069b157eec1df6cdc76fc7f398947e03ae841166d609bff0d250506eb0dd0745ef0fdf86efd
data/CHANGELOG.md ADDED
@@ -0,0 +1,3 @@
1
+ ## 0.1.0 (2024-08-19)
2
+
3
+ - First release
data/LICENSE.txt ADDED
@@ -0,0 +1,203 @@
1
+ Copyright 2018- The Hugging Face team. All rights reserved.
2
+
3
+ Apache License
4
+ Version 2.0, January 2004
5
+ http://www.apache.org/licenses/
6
+
7
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
8
+
9
+ 1. Definitions.
10
+
11
+ "License" shall mean the terms and conditions for use, reproduction,
12
+ and distribution as defined by Sections 1 through 9 of this document.
13
+
14
+ "Licensor" shall mean the copyright owner or entity authorized by
15
+ the copyright owner that is granting the License.
16
+
17
+ "Legal Entity" shall mean the union of the acting entity and all
18
+ other entities that control, are controlled by, or are under common
19
+ control with that entity. For the purposes of this definition,
20
+ "control" means (i) the power, direct or indirect, to cause the
21
+ direction or management of such entity, whether by contract or
22
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
23
+ outstanding shares, or (iii) beneficial ownership of such entity.
24
+
25
+ "You" (or "Your") shall mean an individual or Legal Entity
26
+ exercising permissions granted by this License.
27
+
28
+ "Source" form shall mean the preferred form for making modifications,
29
+ including but not limited to software source code, documentation
30
+ source, and configuration files.
31
+
32
+ "Object" form shall mean any form resulting from mechanical
33
+ transformation or translation of a Source form, including but
34
+ not limited to compiled object code, generated documentation,
35
+ and conversions to other media types.
36
+
37
+ "Work" shall mean the work of authorship, whether in Source or
38
+ Object form, made available under the License, as indicated by a
39
+ copyright notice that is included in or attached to the work
40
+ (an example is provided in the Appendix below).
41
+
42
+ "Derivative Works" shall mean any work, whether in Source or Object
43
+ form, that is based on (or derived from) the Work and for which the
44
+ editorial revisions, annotations, elaborations, or other modifications
45
+ represent, as a whole, an original work of authorship. For the purposes
46
+ of this License, Derivative Works shall not include works that remain
47
+ separable from, or merely link (or bind by name) to the interfaces of,
48
+ the Work and Derivative Works thereof.
49
+
50
+ "Contribution" shall mean any work of authorship, including
51
+ the original version of the Work and any modifications or additions
52
+ to that Work or Derivative Works thereof, that is intentionally
53
+ submitted to Licensor for inclusion in the Work by the copyright owner
54
+ or by an individual or Legal Entity authorized to submit on behalf of
55
+ the copyright owner. For the purposes of this definition, "submitted"
56
+ means any form of electronic, verbal, or written communication sent
57
+ to the Licensor or its representatives, including but not limited to
58
+ communication on electronic mailing lists, source code control systems,
59
+ and issue tracking systems that are managed by, or on behalf of, the
60
+ Licensor for the purpose of discussing and improving the Work, but
61
+ excluding communication that is conspicuously marked or otherwise
62
+ designated in writing by the copyright owner as "Not a Contribution."
63
+
64
+ "Contributor" shall mean Licensor and any individual or Legal Entity
65
+ on behalf of whom a Contribution has been received by Licensor and
66
+ subsequently incorporated within the Work.
67
+
68
+ 2. Grant of Copyright License. Subject to the terms and conditions of
69
+ this License, each Contributor hereby grants to You a perpetual,
70
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
71
+ copyright license to reproduce, prepare Derivative Works of,
72
+ publicly display, publicly perform, sublicense, and distribute the
73
+ Work and such Derivative Works in Source or Object form.
74
+
75
+ 3. Grant of Patent License. Subject to the terms and conditions of
76
+ this License, each Contributor hereby grants to You a perpetual,
77
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
78
+ (except as stated in this section) patent license to make, have made,
79
+ use, offer to sell, sell, import, and otherwise transfer the Work,
80
+ where such license applies only to those patent claims licensable
81
+ by such Contributor that are necessarily infringed by their
82
+ Contribution(s) alone or by combination of their Contribution(s)
83
+ with the Work to which such Contribution(s) was submitted. If You
84
+ institute patent litigation against any entity (including a
85
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
86
+ or a Contribution incorporated within the Work constitutes direct
87
+ or contributory patent infringement, then any patent licenses
88
+ granted to You under this License for that Work shall terminate
89
+ as of the date such litigation is filed.
90
+
91
+ 4. Redistribution. You may reproduce and distribute copies of the
92
+ Work or Derivative Works thereof in any medium, with or without
93
+ modifications, and in Source or Object form, provided that You
94
+ meet the following conditions:
95
+
96
+ (a) You must give any other recipients of the Work or
97
+ Derivative Works a copy of this License; and
98
+
99
+ (b) You must cause any modified files to carry prominent notices
100
+ stating that You changed the files; and
101
+
102
+ (c) You must retain, in the Source form of any Derivative Works
103
+ that You distribute, all copyright, patent, trademark, and
104
+ attribution notices from the Source form of the Work,
105
+ excluding those notices that do not pertain to any part of
106
+ the Derivative Works; and
107
+
108
+ (d) If the Work includes a "NOTICE" text file as part of its
109
+ distribution, then any Derivative Works that You distribute must
110
+ include a readable copy of the attribution notices contained
111
+ within such NOTICE file, excluding those notices that do not
112
+ pertain to any part of the Derivative Works, in at least one
113
+ of the following places: within a NOTICE text file distributed
114
+ as part of the Derivative Works; within the Source form or
115
+ documentation, if provided along with the Derivative Works; or,
116
+ within a display generated by the Derivative Works, if and
117
+ wherever such third-party notices normally appear. The contents
118
+ of the NOTICE file are for informational purposes only and
119
+ do not modify the License. You may add Your own attribution
120
+ notices within Derivative Works that You distribute, alongside
121
+ or as an addendum to the NOTICE text from the Work, provided
122
+ that such additional attribution notices cannot be construed
123
+ as modifying the License.
124
+
125
+ You may add Your own copyright statement to Your modifications and
126
+ may provide additional or different license terms and conditions
127
+ for use, reproduction, or distribution of Your modifications, or
128
+ for any such Derivative Works as a whole, provided Your use,
129
+ reproduction, and distribution of the Work otherwise complies with
130
+ the conditions stated in this License.
131
+
132
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
133
+ any Contribution intentionally submitted for inclusion in the Work
134
+ by You to the Licensor shall be under the terms and conditions of
135
+ this License, without any additional terms or conditions.
136
+ Notwithstanding the above, nothing herein shall supersede or modify
137
+ the terms of any separate license agreement you may have executed
138
+ with Licensor regarding such Contributions.
139
+
140
+ 6. Trademarks. This License does not grant permission to use the trade
141
+ names, trademarks, service marks, or product names of the Licensor,
142
+ except as required for reasonable and customary use in describing the
143
+ origin of the Work and reproducing the content of the NOTICE file.
144
+
145
+ 7. Disclaimer of Warranty. Unless required by applicable law or
146
+ agreed to in writing, Licensor provides the Work (and each
147
+ Contributor provides its Contributions) on an "AS IS" BASIS,
148
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
149
+ implied, including, without limitation, any warranties or conditions
150
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
151
+ PARTICULAR PURPOSE. You are solely responsible for determining the
152
+ appropriateness of using or redistributing the Work and assume any
153
+ risks associated with Your exercise of permissions under this License.
154
+
155
+ 8. Limitation of Liability. In no event and under no legal theory,
156
+ whether in tort (including negligence), contract, or otherwise,
157
+ unless required by applicable law (such as deliberate and grossly
158
+ negligent acts) or agreed to in writing, shall any Contributor be
159
+ liable to You for damages, including any direct, indirect, special,
160
+ incidental, or consequential damages of any character arising as a
161
+ result of this License or out of the use or inability to use the
162
+ Work (including but not limited to damages for loss of goodwill,
163
+ work stoppage, computer failure or malfunction, or any and all
164
+ other commercial damages or losses), even if such Contributor
165
+ has been advised of the possibility of such damages.
166
+
167
+ 9. Accepting Warranty or Additional Liability. While redistributing
168
+ the Work or Derivative Works thereof, You may choose to offer,
169
+ and charge a fee for, acceptance of support, warranty, indemnity,
170
+ or other liability obligations and/or rights consistent with this
171
+ License. However, in accepting such obligations, You may act only
172
+ on Your own behalf and on Your sole responsibility, not on behalf
173
+ of any other Contributor, and only if You agree to indemnify,
174
+ defend, and hold each Contributor harmless for any liability
175
+ incurred by, or claims asserted against, such Contributor by reason
176
+ of your accepting any such warranty or additional liability.
177
+
178
+ END OF TERMS AND CONDITIONS
179
+
180
+ APPENDIX: How to apply the Apache License to your work.
181
+
182
+ To apply the Apache License to your work, attach the following
183
+ boilerplate notice, with the fields enclosed by brackets "[]"
184
+ replaced with your own identifying information. (Don't include
185
+ the brackets!) The text should be enclosed in the appropriate
186
+ comment syntax for the file format. We also recommend that a
187
+ file or class name and description of purpose be included on the
188
+ same "printed page" as the copyright notice for easier
189
+ identification within third-party archives.
190
+
191
+ Copyright [yyyy] [name of copyright owner]
192
+
193
+ Licensed under the Apache License, Version 2.0 (the "License");
194
+ you may not use this file except in compliance with the License.
195
+ You may obtain a copy of the License at
196
+
197
+ http://www.apache.org/licenses/LICENSE-2.0
198
+
199
+ Unless required by applicable law or agreed to in writing, software
200
+ distributed under the License is distributed on an "AS IS" BASIS,
201
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
202
+ See the License for the specific language governing permissions and
203
+ limitations under the License.
data/README.md ADDED
@@ -0,0 +1,163 @@
1
+ # Transformers.rb
2
+
3
+ :slightly_smiling_face: State-of-the-art [transformers](https://github.com/huggingface/transformers) for Ruby
4
+
5
+ [![Build Status](https://github.com/ankane/transformers-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/transformers-ruby/actions)
6
+
7
+ ## Installation
8
+
9
+ First, [install Torch.rb](https://github.com/ankane/torch.rb#installation).
10
+
11
+ Then add this line to your application’s Gemfile:
12
+
13
+ ```ruby
14
+ gem "transformers-rb"
15
+ ```
16
+
17
+ ## Getting Started
18
+
19
+ - [Models](#models)
20
+ - [Pipelines](#pipelines)
21
+
22
+ ## Models
23
+
24
+ ### sentence-transformers/all-MiniLM-L6-v2
25
+
26
+ [Docs](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)
27
+
28
+ ```ruby
29
+ sentences = ["This is an example sentence", "Each sentence is converted"]
30
+
31
+ model = Transformers::SentenceTransformer.new("sentence-transformers/all-MiniLM-L6-v2")
32
+ embeddings = model.encode(sentences)
33
+ ```
34
+
35
+ ### sentence-transformers/multi-qa-MiniLM-L6-cos-v1
36
+
37
+ [Docs](https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1)
38
+
39
+ ```ruby
40
+ query = "How many people live in London?"
41
+ docs = ["Around 9 Million people live in London", "London is known for its financial district"]
42
+
43
+ model = Transformers::SentenceTransformer.new("sentence-transformers/multi-qa-MiniLM-L6-cos-v1")
44
+ query_emb = model.encode(query)
45
+ doc_emb = model.encode(docs)
46
+ scores = Torch.mm(Torch.tensor([query_emb]), Torch.tensor(doc_emb).transpose(0, 1))[0].cpu.to_a
47
+ doc_score_pairs = docs.zip(scores).sort_by { |d, s| -s }
48
+ ```
49
+
50
+ ### mixedbread-ai/mxbai-embed-large-v1
51
+
52
+ [Docs](https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1)
53
+
54
+ ```ruby
55
+ def transform_query(query)
56
+ "Represent this sentence for searching relevant passages: #{query}"
57
+ end
58
+
59
+ docs = [
60
+ transform_query("puppy"),
61
+ "The dog is barking",
62
+ "The cat is purring"
63
+ ]
64
+
65
+ model = Transformers::SentenceTransformer.new("mixedbread-ai/mxbai-embed-large-v1")
66
+ embeddings = model.encode(docs)
67
+ ```
68
+
69
+ ### opensearch-project/opensearch-neural-sparse-encoding-v1
70
+
71
+ [Docs](https://huggingface.co/opensearch-project/opensearch-neural-sparse-encoding-v1)
72
+
73
+ ```ruby
74
+ docs = ["The dog is barking", "The cat is purring", "The bear is growling"]
75
+
76
+ model_id = "opensearch-project/opensearch-neural-sparse-encoding-v1"
77
+ model = Transformers::AutoModelForMaskedLM.from_pretrained(model_id)
78
+ tokenizer = Transformers::AutoTokenizer.from_pretrained(model_id)
79
+ special_token_ids = tokenizer.special_tokens_map.map { |_, token| tokenizer.vocab[token] }
80
+
81
+ feature = tokenizer.(docs, padding: true, truncation: true, return_tensors: "pt", return_token_type_ids: false)
82
+ output = model.(**feature)[0]
83
+
84
+ values, _ = Torch.max(output * feature[:attention_mask].unsqueeze(-1), dim: 1)
85
+ values = Torch.log(1 + Torch.relu(values))
86
+ values[0.., special_token_ids] = 0
87
+ embeddings = values.to_a
88
+ ```
89
+
90
+ ## Pipelines
91
+
92
+ Named-entity recognition
93
+
94
+ ```ruby
95
+ ner = Transformers.pipeline("ner")
96
+ ner.("Ruby is a programming language created by Matz")
97
+ ```
98
+
99
+ Sentiment analysis
100
+
101
+ ```ruby
102
+ classifier = Transformers.pipeline("sentiment-analysis")
103
+ classifier.("We are very happy to show you the 🤗 Transformers library.")
104
+ ```
105
+
106
+ Question answering
107
+
108
+ ```ruby
109
+ qa = Transformers.pipeline("question-answering")
110
+ qa.(question: "Who invented Ruby?", context: "Ruby is a programming language created by Matz")
111
+ ```
112
+
113
+ Feature extraction
114
+
115
+ ```ruby
116
+ extractor = Transformers.pipeline("feature-extraction")
117
+ extractor.("We are very happy to show you the 🤗 Transformers library.")
118
+ ```
119
+
120
+ Image classification
121
+
122
+ ```ruby
123
+ classifier = Transformers.pipeline("image-classification")
124
+ classifier.(URI("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"))
125
+ ```
126
+
127
+ Image feature extraction
128
+
129
+ ```ruby
130
+ extractor = Transformers.pipeline("image-feature-extraction")
131
+ extractor.(URI("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"))
132
+ ```
133
+
134
+ ## API
135
+
136
+ This library follows the [Transformers Python API](https://huggingface.co/docs/transformers/index). Only a few model architectures are currently supported:
137
+
138
+ - BERT
139
+ - DistilBERT
140
+ - ViT
141
+
142
+ ## History
143
+
144
+ View the [changelog](https://github.com/ankane/transformers-ruby/blob/master/CHANGELOG.md)
145
+
146
+ ## Contributing
147
+
148
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
149
+
150
+ - [Report bugs](https://github.com/ankane/transformers-ruby/issues)
151
+ - Fix bugs and [submit pull requests](https://github.com/ankane/transformers-ruby/pulls)
152
+ - Write, clarify, or fix documentation
153
+ - Suggest or add new features
154
+
155
+ To get started with development:
156
+
157
+ ```sh
158
+ git clone https://github.com/ankane/transformers-ruby.git
159
+ cd transformers-ruby
160
+ bundle install
161
+ bundle exec rake download:files
162
+ bundle exec rake test
163
+ ```
@@ -0,0 +1,57 @@
1
+ # Copyright 2020 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Transformers
16
+ class GELUActivation < Torch::NN::Module
17
+ def initialize(use_gelu_python: false)
18
+ super()
19
+ if use_gelu_python
20
+ @act = _gelu_python
21
+ else
22
+ @act = Torch::NN::Functional.method(:gelu)
23
+ end
24
+ end
25
+
26
+ def _gelu_python(input)
27
+ input * 0.5 * (1.0 + Torch.erf(input / Math.sqrt(2.0)))
28
+ end
29
+
30
+ def forward(input)
31
+ @act.(input)
32
+ end
33
+ end
34
+
35
+ class ClassInstantier
36
+ def initialize(data)
37
+ @data = data
38
+ end
39
+
40
+ def [](key)
41
+ content = @data.fetch(key)
42
+ cls, kwargs = content.is_a?(Array) ? content : [content, {}]
43
+ cls.new(**kwargs)
44
+ end
45
+ end
46
+
47
+ ACT2CLS = {
48
+ "gelu" => GELUActivation
49
+ }
50
+ ACT2FN = ClassInstantier.new(ACT2CLS)
51
+
52
+ module Activations
53
+ def self.get_activation(activation_string)
54
+ ACT2FN[activation_string]
55
+ end
56
+ end
57
+ end