transformers-rb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE.txt +203 -0
  4. data/README.md +163 -0
  5. data/lib/transformers/activations.rb +57 -0
  6. data/lib/transformers/configuration_utils.rb +285 -0
  7. data/lib/transformers/convert_slow_tokenizer.rb +90 -0
  8. data/lib/transformers/data/processors/squad.rb +115 -0
  9. data/lib/transformers/dynamic_module_utils.rb +25 -0
  10. data/lib/transformers/feature_extraction_utils.rb +110 -0
  11. data/lib/transformers/hf_hub/constants.rb +71 -0
  12. data/lib/transformers/hf_hub/errors.rb +11 -0
  13. data/lib/transformers/hf_hub/file_download.rb +764 -0
  14. data/lib/transformers/hf_hub/utils/_errors.rb +94 -0
  15. data/lib/transformers/hf_hub/utils/_headers.rb +109 -0
  16. data/lib/transformers/image_processing_base.rb +169 -0
  17. data/lib/transformers/image_processing_utils.rb +63 -0
  18. data/lib/transformers/image_transforms.rb +208 -0
  19. data/lib/transformers/image_utils.rb +165 -0
  20. data/lib/transformers/modeling_outputs.rb +81 -0
  21. data/lib/transformers/modeling_utils.rb +888 -0
  22. data/lib/transformers/models/auto/auto_factory.rb +138 -0
  23. data/lib/transformers/models/auto/configuration_auto.rb +61 -0
  24. data/lib/transformers/models/auto/feature_extraction_auto.rb +20 -0
  25. data/lib/transformers/models/auto/image_processing_auto.rb +104 -0
  26. data/lib/transformers/models/auto/modeling_auto.rb +80 -0
  27. data/lib/transformers/models/auto/tokenization_auto.rb +160 -0
  28. data/lib/transformers/models/bert/configuration_bert.rb +65 -0
  29. data/lib/transformers/models/bert/modeling_bert.rb +836 -0
  30. data/lib/transformers/models/bert/tokenization_bert.rb +115 -0
  31. data/lib/transformers/models/bert/tokenization_bert_fast.rb +52 -0
  32. data/lib/transformers/models/distilbert/configuration_distilbert.rb +63 -0
  33. data/lib/transformers/models/distilbert/modeling_distilbert.rb +616 -0
  34. data/lib/transformers/models/distilbert/tokenization_distilbert.rb +114 -0
  35. data/lib/transformers/models/distilbert/tokenization_distilbert_fast.rb +71 -0
  36. data/lib/transformers/models/vit/configuration_vit.rb +60 -0
  37. data/lib/transformers/models/vit/image_processing_vit.rb +170 -0
  38. data/lib/transformers/models/vit/modeling_vit.rb +506 -0
  39. data/lib/transformers/pipelines/_init.rb +348 -0
  40. data/lib/transformers/pipelines/base.rb +301 -0
  41. data/lib/transformers/pipelines/feature_extraction.rb +47 -0
  42. data/lib/transformers/pipelines/image_classification.rb +110 -0
  43. data/lib/transformers/pipelines/image_feature_extraction.rb +56 -0
  44. data/lib/transformers/pipelines/pt_utils.rb +53 -0
  45. data/lib/transformers/pipelines/question_answering.rb +508 -0
  46. data/lib/transformers/pipelines/text_classification.rb +123 -0
  47. data/lib/transformers/pipelines/token_classification.rb +282 -0
  48. data/lib/transformers/ruby_utils.rb +33 -0
  49. data/lib/transformers/sentence_transformer.rb +37 -0
  50. data/lib/transformers/tokenization_utils.rb +152 -0
  51. data/lib/transformers/tokenization_utils_base.rb +937 -0
  52. data/lib/transformers/tokenization_utils_fast.rb +386 -0
  53. data/lib/transformers/torch_utils.rb +25 -0
  54. data/lib/transformers/utils/_init.rb +31 -0
  55. data/lib/transformers/utils/generic.rb +107 -0
  56. data/lib/transformers/utils/hub.rb +209 -0
  57. data/lib/transformers/utils/import_utils.rb +45 -0
  58. data/lib/transformers/utils/logging.rb +52 -0
  59. data/lib/transformers/version.rb +3 -0
  60. data/lib/transformers-rb.rb +1 -0
  61. data/lib/transformers.rb +100 -0
  62. data/licenses/LICENSE-huggingface-hub.txt +201 -0
  63. data/licenses/LICENSE-sentence-transformers.txt +201 -0
  64. data/licenses/NOTICE-sentence-transformers.txt +5 -0
  65. metadata +161 -0
@@ -0,0 +1,110 @@
1
+ module Transformers
2
+ class ClassificationFunction < ExplicitEnum
3
+ SIGMOID = "sigmoid"
4
+ SOFTMAX = "softmax"
5
+ NONE = "none"
6
+ end
7
+
8
+ class ImageClassificationPipeline < Pipeline
9
+ extend ClassAttribute
10
+
11
+ class_attribute :function_to_apply, ClassificationFunction::NONE
12
+
13
+ def initialize(*args, **kwargs)
14
+ super(*args, **kwargs)
15
+ Utils.requires_backends(self, "vision")
16
+ check_model_type(MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES)
17
+ end
18
+
19
+ def _sanitize_parameters(top_k: nil, function_to_apply: nil, timeout: nil)
20
+ preprocess_params = {}
21
+ if !timeout.nil?
22
+ preprocess_params[:timeout] = timeout
23
+ end
24
+ postprocess_params = {}
25
+ if !top_k.nil?
26
+ postprocess_params[:top_k] = top_k
27
+ end
28
+ if function_to_apply.is_a?(String)
29
+ function_to_apply = ClassificationFunction.new(function_to_apply.downcase).to_s
30
+ end
31
+ if !function_to_apply.nil?
32
+ postprocess_params[:function_to_apply] = function_to_apply
33
+ end
34
+ [preprocess_params, {}, postprocess_params]
35
+ end
36
+
37
+ def preprocess(image, timeout: nil)
38
+ image = ImageUtils.load_image(image, timeout: timeout)
39
+ model_inputs = @image_processor.(image, return_tensors: @framework)
40
+ if @framework == "pt"
41
+ # TODO
42
+ # model_inputs = model_inputs.to(torch_dtype)
43
+ end
44
+ model_inputs
45
+ end
46
+
47
+ def _forward(model_inputs)
48
+ model_outputs = @model.(**model_inputs.to_h)
49
+ model_outputs
50
+ end
51
+
52
+ def postprocess(model_outputs, function_to_apply: nil, top_k: 5)
53
+ if function_to_apply.nil?
54
+ if @model.config.problem_type == "multi_label_classification" || @model.config.num_labels == 1
55
+ function_to_apply = ClassificationFunction::SIGMOID
56
+ elsif @model.config.problem_type == "single_label_classification" || @model.config.num_labels > 1
57
+ function_to_apply = ClassificationFunction::SOFTMAX
58
+ elsif @model.config.instance_variable_defined?(:@function_to_apply) && function_to_apply.nil?
59
+ function_to_apply = @model.config.function_to_apply
60
+ else
61
+ function_to_apply = ClassificationFunction::NONE
62
+ end
63
+ end
64
+
65
+ if top_k > @model.config.num_labels
66
+ top_k = @model.config.num_labels
67
+ end
68
+
69
+ outputs = model_outputs[:logits][0]
70
+ if @framework == "pt" && [Torch.bfloat16, Torch.float16].include?(outputs.dtype)
71
+ outputs = outputs.to(Torch.float32).numo
72
+ else
73
+ outputs = outputs.numo
74
+ end
75
+
76
+ if function_to_apply == ClassificationFunction::SIGMOID
77
+ scores = sigmoid(outputs)
78
+ elsif function_to_apply == ClassificationFunction::SOFTMAX
79
+ scores = softmax(outputs)
80
+ elsif function_to_apply == ClassificationFunction::NONE
81
+ scores = outputs
82
+ else
83
+ raise ArgumentError, "Unrecognized `function_to_apply` argument: #{function_to_apply}"
84
+ end
85
+
86
+ dict_scores =
87
+ scores.to_a.map.with_index do |score, i|
88
+ {label: @model.config.id2label[i], score: score}
89
+ end
90
+ dict_scores.sort_by! { |x| -x[:score] }
91
+ if !top_k.nil?
92
+ dict_scores = dict_scores[...top_k]
93
+ end
94
+
95
+ dict_scores
96
+ end
97
+
98
+ private
99
+
100
+ def sigmoid(_outputs)
101
+ 1.0 / (1.0 + Numo::NMath.exp(-_outputs))
102
+ end
103
+
104
+ def softmax(_outputs)
105
+ maxes = _outputs.max(axis: -1, keepdims: true)
106
+ shifted_exp = Numo::NMath.exp(_outputs - maxes)
107
+ shifted_exp / shifted_exp.sum(axis: -1, keepdims: true)
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,56 @@
1
+ module Transformers
2
+ class ImageFeatureExtractionPipeline < Pipeline
3
+ def _sanitize_parameters(image_processor_kwargs: nil, return_tensors: nil, pool: nil, **kwargs)
4
+ preprocess_params = image_processor_kwargs.nil? ? {} : image_processor_kwargs
5
+
6
+ postprocess_params = {}
7
+ if !pool.nil?
8
+ postprocess_params[:pool] = pool
9
+ end
10
+ if !return_tensors.nil?
11
+ postprocess_params[:return_tensors] = return_tensors
12
+ end
13
+
14
+ if kwargs.include?(:timeout)
15
+ preprocess_params[:timeout] = kwargs[:timeout]
16
+ end
17
+
18
+ [preprocess_params, {}, postprocess_params]
19
+ end
20
+
21
+ def preprocess(image, timeout: nil, **image_processor_kwargs)
22
+ image = ImageUtils.load_image(image, timeout: timeout)
23
+ model_inputs = @image_processor.(image, return_tensors: @framework, **image_processor_kwargs)
24
+ if @framework == "pt"
25
+ # TODO
26
+ # model_inputs = model_inputs.to(torch_dtype)
27
+ end
28
+ model_inputs
29
+ end
30
+
31
+ def _forward(model_inputs)
32
+ model_outputs = @model.(**model_inputs.to_h)
33
+ model_outputs
34
+ end
35
+
36
+ def postprocess(model_outputs, pool: nil, return_tensors: false)
37
+ pool = !pool.nil? ? pool : false
38
+
39
+ if pool
40
+ raise Todo
41
+ else
42
+ # [0] is the first available tensor, logits or last_hidden_state.
43
+ outputs = model_outputs[0]
44
+ end
45
+
46
+ if return_tensors
47
+ return outputs
48
+ end
49
+ if @framework == "pt"
50
+ outputs.to_a
51
+ else
52
+ raise Todo
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,53 @@
1
+ module Transformers
2
+ class PipelineDataset < Torch::Utils::Data::Dataset
3
+ def initialize(dataset, process, params)
4
+ @dataset = dataset
5
+ @process = process
6
+ @params = params
7
+ end
8
+
9
+ def size
10
+ @dataset.size
11
+ end
12
+
13
+ def [](i)
14
+ item = @dataset[i]
15
+ processed = @process.(item, **@params)
16
+ processed
17
+ end
18
+ end
19
+
20
+ class PipelineIterator < Torch::Utils::Data::IterableDataset
21
+ def initialize(loader, infer, params, loader_batch_size: nil)
22
+ @loader = loader
23
+ @infer = infer
24
+ @params = params
25
+ if loader_batch_size == 1
26
+ # Let's spare some time by deactivating altogether
27
+ loader_batch_size = nil
28
+ end
29
+ @loader_batch_size = loader_batch_size
30
+
31
+ # Internal bookkeeping
32
+ @loader_batch_index = nil
33
+ @loader_batch_data = nil
34
+ end
35
+
36
+ def size
37
+ @loader.size
38
+ end
39
+
40
+ def [](i)
41
+ @infer.(@loader[i], **@params)
42
+ end
43
+
44
+ def each
45
+ @iterator = @loader
46
+
47
+ @iterator.each do |item|
48
+ processed = @infer.(item, **@params)
49
+ yield processed
50
+ end
51
+ end
52
+ end
53
+ end