transformers-rb 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +203 -0
- data/README.md +163 -0
- data/lib/transformers/activations.rb +57 -0
- data/lib/transformers/configuration_utils.rb +285 -0
- data/lib/transformers/convert_slow_tokenizer.rb +90 -0
- data/lib/transformers/data/processors/squad.rb +115 -0
- data/lib/transformers/dynamic_module_utils.rb +25 -0
- data/lib/transformers/feature_extraction_utils.rb +110 -0
- data/lib/transformers/hf_hub/constants.rb +71 -0
- data/lib/transformers/hf_hub/errors.rb +11 -0
- data/lib/transformers/hf_hub/file_download.rb +764 -0
- data/lib/transformers/hf_hub/utils/_errors.rb +94 -0
- data/lib/transformers/hf_hub/utils/_headers.rb +109 -0
- data/lib/transformers/image_processing_base.rb +169 -0
- data/lib/transformers/image_processing_utils.rb +63 -0
- data/lib/transformers/image_transforms.rb +208 -0
- data/lib/transformers/image_utils.rb +165 -0
- data/lib/transformers/modeling_outputs.rb +81 -0
- data/lib/transformers/modeling_utils.rb +888 -0
- data/lib/transformers/models/auto/auto_factory.rb +138 -0
- data/lib/transformers/models/auto/configuration_auto.rb +61 -0
- data/lib/transformers/models/auto/feature_extraction_auto.rb +20 -0
- data/lib/transformers/models/auto/image_processing_auto.rb +104 -0
- data/lib/transformers/models/auto/modeling_auto.rb +80 -0
- data/lib/transformers/models/auto/tokenization_auto.rb +160 -0
- data/lib/transformers/models/bert/configuration_bert.rb +65 -0
- data/lib/transformers/models/bert/modeling_bert.rb +836 -0
- data/lib/transformers/models/bert/tokenization_bert.rb +115 -0
- data/lib/transformers/models/bert/tokenization_bert_fast.rb +52 -0
- data/lib/transformers/models/distilbert/configuration_distilbert.rb +63 -0
- data/lib/transformers/models/distilbert/modeling_distilbert.rb +616 -0
- data/lib/transformers/models/distilbert/tokenization_distilbert.rb +114 -0
- data/lib/transformers/models/distilbert/tokenization_distilbert_fast.rb +71 -0
- data/lib/transformers/models/vit/configuration_vit.rb +60 -0
- data/lib/transformers/models/vit/image_processing_vit.rb +170 -0
- data/lib/transformers/models/vit/modeling_vit.rb +506 -0
- data/lib/transformers/pipelines/_init.rb +348 -0
- data/lib/transformers/pipelines/base.rb +301 -0
- data/lib/transformers/pipelines/feature_extraction.rb +47 -0
- data/lib/transformers/pipelines/image_classification.rb +110 -0
- data/lib/transformers/pipelines/image_feature_extraction.rb +56 -0
- data/lib/transformers/pipelines/pt_utils.rb +53 -0
- data/lib/transformers/pipelines/question_answering.rb +508 -0
- data/lib/transformers/pipelines/text_classification.rb +123 -0
- data/lib/transformers/pipelines/token_classification.rb +282 -0
- data/lib/transformers/ruby_utils.rb +33 -0
- data/lib/transformers/sentence_transformer.rb +37 -0
- data/lib/transformers/tokenization_utils.rb +152 -0
- data/lib/transformers/tokenization_utils_base.rb +937 -0
- data/lib/transformers/tokenization_utils_fast.rb +386 -0
- data/lib/transformers/torch_utils.rb +25 -0
- data/lib/transformers/utils/_init.rb +31 -0
- data/lib/transformers/utils/generic.rb +107 -0
- data/lib/transformers/utils/hub.rb +209 -0
- data/lib/transformers/utils/import_utils.rb +45 -0
- data/lib/transformers/utils/logging.rb +52 -0
- data/lib/transformers/version.rb +3 -0
- data/lib/transformers-rb.rb +1 -0
- data/lib/transformers.rb +100 -0
- data/licenses/LICENSE-huggingface-hub.txt +201 -0
- data/licenses/LICENSE-sentence-transformers.txt +201 -0
- data/licenses/NOTICE-sentence-transformers.txt +5 -0
- metadata +161 -0
@@ -0,0 +1,110 @@
|
|
1
|
+
module Transformers
|
2
|
+
class ClassificationFunction < ExplicitEnum
|
3
|
+
SIGMOID = "sigmoid"
|
4
|
+
SOFTMAX = "softmax"
|
5
|
+
NONE = "none"
|
6
|
+
end
|
7
|
+
|
8
|
+
class ImageClassificationPipeline < Pipeline
|
9
|
+
extend ClassAttribute
|
10
|
+
|
11
|
+
class_attribute :function_to_apply, ClassificationFunction::NONE
|
12
|
+
|
13
|
+
def initialize(*args, **kwargs)
|
14
|
+
super(*args, **kwargs)
|
15
|
+
Utils.requires_backends(self, "vision")
|
16
|
+
check_model_type(MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES)
|
17
|
+
end
|
18
|
+
|
19
|
+
def _sanitize_parameters(top_k: nil, function_to_apply: nil, timeout: nil)
|
20
|
+
preprocess_params = {}
|
21
|
+
if !timeout.nil?
|
22
|
+
preprocess_params[:timeout] = timeout
|
23
|
+
end
|
24
|
+
postprocess_params = {}
|
25
|
+
if !top_k.nil?
|
26
|
+
postprocess_params[:top_k] = top_k
|
27
|
+
end
|
28
|
+
if function_to_apply.is_a?(String)
|
29
|
+
function_to_apply = ClassificationFunction.new(function_to_apply.downcase).to_s
|
30
|
+
end
|
31
|
+
if !function_to_apply.nil?
|
32
|
+
postprocess_params[:function_to_apply] = function_to_apply
|
33
|
+
end
|
34
|
+
[preprocess_params, {}, postprocess_params]
|
35
|
+
end
|
36
|
+
|
37
|
+
def preprocess(image, timeout: nil)
|
38
|
+
image = ImageUtils.load_image(image, timeout: timeout)
|
39
|
+
model_inputs = @image_processor.(image, return_tensors: @framework)
|
40
|
+
if @framework == "pt"
|
41
|
+
# TODO
|
42
|
+
# model_inputs = model_inputs.to(torch_dtype)
|
43
|
+
end
|
44
|
+
model_inputs
|
45
|
+
end
|
46
|
+
|
47
|
+
def _forward(model_inputs)
|
48
|
+
model_outputs = @model.(**model_inputs.to_h)
|
49
|
+
model_outputs
|
50
|
+
end
|
51
|
+
|
52
|
+
def postprocess(model_outputs, function_to_apply: nil, top_k: 5)
|
53
|
+
if function_to_apply.nil?
|
54
|
+
if @model.config.problem_type == "multi_label_classification" || @model.config.num_labels == 1
|
55
|
+
function_to_apply = ClassificationFunction::SIGMOID
|
56
|
+
elsif @model.config.problem_type == "single_label_classification" || @model.config.num_labels > 1
|
57
|
+
function_to_apply = ClassificationFunction::SOFTMAX
|
58
|
+
elsif @model.config.instance_variable_defined?(:@function_to_apply) && function_to_apply.nil?
|
59
|
+
function_to_apply = @model.config.function_to_apply
|
60
|
+
else
|
61
|
+
function_to_apply = ClassificationFunction::NONE
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
if top_k > @model.config.num_labels
|
66
|
+
top_k = @model.config.num_labels
|
67
|
+
end
|
68
|
+
|
69
|
+
outputs = model_outputs[:logits][0]
|
70
|
+
if @framework == "pt" && [Torch.bfloat16, Torch.float16].include?(outputs.dtype)
|
71
|
+
outputs = outputs.to(Torch.float32).numo
|
72
|
+
else
|
73
|
+
outputs = outputs.numo
|
74
|
+
end
|
75
|
+
|
76
|
+
if function_to_apply == ClassificationFunction::SIGMOID
|
77
|
+
scores = sigmoid(outputs)
|
78
|
+
elsif function_to_apply == ClassificationFunction::SOFTMAX
|
79
|
+
scores = softmax(outputs)
|
80
|
+
elsif function_to_apply == ClassificationFunction::NONE
|
81
|
+
scores = outputs
|
82
|
+
else
|
83
|
+
raise ArgumentError, "Unrecognized `function_to_apply` argument: #{function_to_apply}"
|
84
|
+
end
|
85
|
+
|
86
|
+
dict_scores =
|
87
|
+
scores.to_a.map.with_index do |score, i|
|
88
|
+
{label: @model.config.id2label[i], score: score}
|
89
|
+
end
|
90
|
+
dict_scores.sort_by! { |x| -x[:score] }
|
91
|
+
if !top_k.nil?
|
92
|
+
dict_scores = dict_scores[...top_k]
|
93
|
+
end
|
94
|
+
|
95
|
+
dict_scores
|
96
|
+
end
|
97
|
+
|
98
|
+
private
|
99
|
+
|
100
|
+
def sigmoid(_outputs)
|
101
|
+
1.0 / (1.0 + Numo::NMath.exp(-_outputs))
|
102
|
+
end
|
103
|
+
|
104
|
+
def softmax(_outputs)
|
105
|
+
maxes = _outputs.max(axis: -1, keepdims: true)
|
106
|
+
shifted_exp = Numo::NMath.exp(_outputs - maxes)
|
107
|
+
shifted_exp / shifted_exp.sum(axis: -1, keepdims: true)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Transformers
|
2
|
+
class ImageFeatureExtractionPipeline < Pipeline
|
3
|
+
def _sanitize_parameters(image_processor_kwargs: nil, return_tensors: nil, pool: nil, **kwargs)
|
4
|
+
preprocess_params = image_processor_kwargs.nil? ? {} : image_processor_kwargs
|
5
|
+
|
6
|
+
postprocess_params = {}
|
7
|
+
if !pool.nil?
|
8
|
+
postprocess_params[:pool] = pool
|
9
|
+
end
|
10
|
+
if !return_tensors.nil?
|
11
|
+
postprocess_params[:return_tensors] = return_tensors
|
12
|
+
end
|
13
|
+
|
14
|
+
if kwargs.include?(:timeout)
|
15
|
+
preprocess_params[:timeout] = kwargs[:timeout]
|
16
|
+
end
|
17
|
+
|
18
|
+
[preprocess_params, {}, postprocess_params]
|
19
|
+
end
|
20
|
+
|
21
|
+
def preprocess(image, timeout: nil, **image_processor_kwargs)
|
22
|
+
image = ImageUtils.load_image(image, timeout: timeout)
|
23
|
+
model_inputs = @image_processor.(image, return_tensors: @framework, **image_processor_kwargs)
|
24
|
+
if @framework == "pt"
|
25
|
+
# TODO
|
26
|
+
# model_inputs = model_inputs.to(torch_dtype)
|
27
|
+
end
|
28
|
+
model_inputs
|
29
|
+
end
|
30
|
+
|
31
|
+
def _forward(model_inputs)
|
32
|
+
model_outputs = @model.(**model_inputs.to_h)
|
33
|
+
model_outputs
|
34
|
+
end
|
35
|
+
|
36
|
+
def postprocess(model_outputs, pool: nil, return_tensors: false)
|
37
|
+
pool = !pool.nil? ? pool : false
|
38
|
+
|
39
|
+
if pool
|
40
|
+
raise Todo
|
41
|
+
else
|
42
|
+
# [0] is the first available tensor, logits or last_hidden_state.
|
43
|
+
outputs = model_outputs[0]
|
44
|
+
end
|
45
|
+
|
46
|
+
if return_tensors
|
47
|
+
return outputs
|
48
|
+
end
|
49
|
+
if @framework == "pt"
|
50
|
+
outputs.to_a
|
51
|
+
else
|
52
|
+
raise Todo
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Transformers
|
2
|
+
class PipelineDataset < Torch::Utils::Data::Dataset
|
3
|
+
def initialize(dataset, process, params)
|
4
|
+
@dataset = dataset
|
5
|
+
@process = process
|
6
|
+
@params = params
|
7
|
+
end
|
8
|
+
|
9
|
+
def size
|
10
|
+
@dataset.size
|
11
|
+
end
|
12
|
+
|
13
|
+
def [](i)
|
14
|
+
item = @dataset[i]
|
15
|
+
processed = @process.(item, **@params)
|
16
|
+
processed
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class PipelineIterator < Torch::Utils::Data::IterableDataset
|
21
|
+
def initialize(loader, infer, params, loader_batch_size: nil)
|
22
|
+
@loader = loader
|
23
|
+
@infer = infer
|
24
|
+
@params = params
|
25
|
+
if loader_batch_size == 1
|
26
|
+
# Let's spare some time by deactivating altogether
|
27
|
+
loader_batch_size = nil
|
28
|
+
end
|
29
|
+
@loader_batch_size = loader_batch_size
|
30
|
+
|
31
|
+
# Internal bookkeeping
|
32
|
+
@loader_batch_index = nil
|
33
|
+
@loader_batch_data = nil
|
34
|
+
end
|
35
|
+
|
36
|
+
def size
|
37
|
+
@loader.size
|
38
|
+
end
|
39
|
+
|
40
|
+
def [](i)
|
41
|
+
@infer.(@loader[i], **@params)
|
42
|
+
end
|
43
|
+
|
44
|
+
def each
|
45
|
+
@iterator = @loader
|
46
|
+
|
47
|
+
@iterator.each do |item|
|
48
|
+
processed = @infer.(item, **@params)
|
49
|
+
yield processed
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|