transformers-rb 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE.txt +203 -0
  4. data/README.md +163 -0
  5. data/lib/transformers/activations.rb +57 -0
  6. data/lib/transformers/configuration_utils.rb +285 -0
  7. data/lib/transformers/convert_slow_tokenizer.rb +90 -0
  8. data/lib/transformers/data/processors/squad.rb +115 -0
  9. data/lib/transformers/dynamic_module_utils.rb +25 -0
  10. data/lib/transformers/feature_extraction_utils.rb +110 -0
  11. data/lib/transformers/hf_hub/constants.rb +71 -0
  12. data/lib/transformers/hf_hub/errors.rb +11 -0
  13. data/lib/transformers/hf_hub/file_download.rb +764 -0
  14. data/lib/transformers/hf_hub/utils/_errors.rb +94 -0
  15. data/lib/transformers/hf_hub/utils/_headers.rb +109 -0
  16. data/lib/transformers/image_processing_base.rb +169 -0
  17. data/lib/transformers/image_processing_utils.rb +63 -0
  18. data/lib/transformers/image_transforms.rb +208 -0
  19. data/lib/transformers/image_utils.rb +165 -0
  20. data/lib/transformers/modeling_outputs.rb +81 -0
  21. data/lib/transformers/modeling_utils.rb +888 -0
  22. data/lib/transformers/models/auto/auto_factory.rb +138 -0
  23. data/lib/transformers/models/auto/configuration_auto.rb +61 -0
  24. data/lib/transformers/models/auto/feature_extraction_auto.rb +20 -0
  25. data/lib/transformers/models/auto/image_processing_auto.rb +104 -0
  26. data/lib/transformers/models/auto/modeling_auto.rb +80 -0
  27. data/lib/transformers/models/auto/tokenization_auto.rb +160 -0
  28. data/lib/transformers/models/bert/configuration_bert.rb +65 -0
  29. data/lib/transformers/models/bert/modeling_bert.rb +836 -0
  30. data/lib/transformers/models/bert/tokenization_bert.rb +115 -0
  31. data/lib/transformers/models/bert/tokenization_bert_fast.rb +52 -0
  32. data/lib/transformers/models/distilbert/configuration_distilbert.rb +63 -0
  33. data/lib/transformers/models/distilbert/modeling_distilbert.rb +616 -0
  34. data/lib/transformers/models/distilbert/tokenization_distilbert.rb +114 -0
  35. data/lib/transformers/models/distilbert/tokenization_distilbert_fast.rb +71 -0
  36. data/lib/transformers/models/vit/configuration_vit.rb +60 -0
  37. data/lib/transformers/models/vit/image_processing_vit.rb +170 -0
  38. data/lib/transformers/models/vit/modeling_vit.rb +506 -0
  39. data/lib/transformers/pipelines/_init.rb +348 -0
  40. data/lib/transformers/pipelines/base.rb +301 -0
  41. data/lib/transformers/pipelines/feature_extraction.rb +47 -0
  42. data/lib/transformers/pipelines/image_classification.rb +110 -0
  43. data/lib/transformers/pipelines/image_feature_extraction.rb +56 -0
  44. data/lib/transformers/pipelines/pt_utils.rb +53 -0
  45. data/lib/transformers/pipelines/question_answering.rb +508 -0
  46. data/lib/transformers/pipelines/text_classification.rb +123 -0
  47. data/lib/transformers/pipelines/token_classification.rb +282 -0
  48. data/lib/transformers/ruby_utils.rb +33 -0
  49. data/lib/transformers/sentence_transformer.rb +37 -0
  50. data/lib/transformers/tokenization_utils.rb +152 -0
  51. data/lib/transformers/tokenization_utils_base.rb +937 -0
  52. data/lib/transformers/tokenization_utils_fast.rb +386 -0
  53. data/lib/transformers/torch_utils.rb +25 -0
  54. data/lib/transformers/utils/_init.rb +31 -0
  55. data/lib/transformers/utils/generic.rb +107 -0
  56. data/lib/transformers/utils/hub.rb +209 -0
  57. data/lib/transformers/utils/import_utils.rb +45 -0
  58. data/lib/transformers/utils/logging.rb +52 -0
  59. data/lib/transformers/version.rb +3 -0
  60. data/lib/transformers-rb.rb +1 -0
  61. data/lib/transformers.rb +100 -0
  62. data/licenses/LICENSE-huggingface-hub.txt +201 -0
  63. data/licenses/LICENSE-sentence-transformers.txt +201 -0
  64. data/licenses/NOTICE-sentence-transformers.txt +5 -0
  65. metadata +161 -0
@@ -0,0 +1,110 @@
1
+ module Transformers
2
+ class ClassificationFunction < ExplicitEnum
3
+ SIGMOID = "sigmoid"
4
+ SOFTMAX = "softmax"
5
+ NONE = "none"
6
+ end
7
+
8
+ class ImageClassificationPipeline < Pipeline
9
+ extend ClassAttribute
10
+
11
+ class_attribute :function_to_apply, ClassificationFunction::NONE
12
+
13
+ def initialize(*args, **kwargs)
14
+ super(*args, **kwargs)
15
+ Utils.requires_backends(self, "vision")
16
+ check_model_type(MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES)
17
+ end
18
+
19
+ def _sanitize_parameters(top_k: nil, function_to_apply: nil, timeout: nil)
20
+ preprocess_params = {}
21
+ if !timeout.nil?
22
+ preprocess_params[:timeout] = timeout
23
+ end
24
+ postprocess_params = {}
25
+ if !top_k.nil?
26
+ postprocess_params[:top_k] = top_k
27
+ end
28
+ if function_to_apply.is_a?(String)
29
+ function_to_apply = ClassificationFunction.new(function_to_apply.downcase).to_s
30
+ end
31
+ if !function_to_apply.nil?
32
+ postprocess_params[:function_to_apply] = function_to_apply
33
+ end
34
+ [preprocess_params, {}, postprocess_params]
35
+ end
36
+
37
+ def preprocess(image, timeout: nil)
38
+ image = ImageUtils.load_image(image, timeout: timeout)
39
+ model_inputs = @image_processor.(image, return_tensors: @framework)
40
+ if @framework == "pt"
41
+ # TODO
42
+ # model_inputs = model_inputs.to(torch_dtype)
43
+ end
44
+ model_inputs
45
+ end
46
+
47
+ def _forward(model_inputs)
48
+ model_outputs = @model.(**model_inputs.to_h)
49
+ model_outputs
50
+ end
51
+
52
+ def postprocess(model_outputs, function_to_apply: nil, top_k: 5)
53
+ if function_to_apply.nil?
54
+ if @model.config.problem_type == "multi_label_classification" || @model.config.num_labels == 1
55
+ function_to_apply = ClassificationFunction::SIGMOID
56
+ elsif @model.config.problem_type == "single_label_classification" || @model.config.num_labels > 1
57
+ function_to_apply = ClassificationFunction::SOFTMAX
58
+ elsif @model.config.instance_variable_defined?(:@function_to_apply) && function_to_apply.nil?
59
+ function_to_apply = @model.config.function_to_apply
60
+ else
61
+ function_to_apply = ClassificationFunction::NONE
62
+ end
63
+ end
64
+
65
+ if top_k > @model.config.num_labels
66
+ top_k = @model.config.num_labels
67
+ end
68
+
69
+ outputs = model_outputs[:logits][0]
70
+ if @framework == "pt" && [Torch.bfloat16, Torch.float16].include?(outputs.dtype)
71
+ outputs = outputs.to(Torch.float32).numo
72
+ else
73
+ outputs = outputs.numo
74
+ end
75
+
76
+ if function_to_apply == ClassificationFunction::SIGMOID
77
+ scores = sigmoid(outputs)
78
+ elsif function_to_apply == ClassificationFunction::SOFTMAX
79
+ scores = softmax(outputs)
80
+ elsif function_to_apply == ClassificationFunction::NONE
81
+ scores = outputs
82
+ else
83
+ raise ArgumentError, "Unrecognized `function_to_apply` argument: #{function_to_apply}"
84
+ end
85
+
86
+ dict_scores =
87
+ scores.to_a.map.with_index do |score, i|
88
+ {label: @model.config.id2label[i], score: score}
89
+ end
90
+ dict_scores.sort_by! { |x| -x[:score] }
91
+ if !top_k.nil?
92
+ dict_scores = dict_scores[...top_k]
93
+ end
94
+
95
+ dict_scores
96
+ end
97
+
98
+ private
99
+
100
+ def sigmoid(_outputs)
101
+ 1.0 / (1.0 + Numo::NMath.exp(-_outputs))
102
+ end
103
+
104
+ def softmax(_outputs)
105
+ maxes = _outputs.max(axis: -1, keepdims: true)
106
+ shifted_exp = Numo::NMath.exp(_outputs - maxes)
107
+ shifted_exp / shifted_exp.sum(axis: -1, keepdims: true)
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,56 @@
1
+ module Transformers
2
+ class ImageFeatureExtractionPipeline < Pipeline
3
+ def _sanitize_parameters(image_processor_kwargs: nil, return_tensors: nil, pool: nil, **kwargs)
4
+ preprocess_params = image_processor_kwargs.nil? ? {} : image_processor_kwargs
5
+
6
+ postprocess_params = {}
7
+ if !pool.nil?
8
+ postprocess_params[:pool] = pool
9
+ end
10
+ if !return_tensors.nil?
11
+ postprocess_params[:return_tensors] = return_tensors
12
+ end
13
+
14
+ if kwargs.include?(:timeout)
15
+ preprocess_params[:timeout] = kwargs[:timeout]
16
+ end
17
+
18
+ [preprocess_params, {}, postprocess_params]
19
+ end
20
+
21
+ def preprocess(image, timeout: nil, **image_processor_kwargs)
22
+ image = ImageUtils.load_image(image, timeout: timeout)
23
+ model_inputs = @image_processor.(image, return_tensors: @framework, **image_processor_kwargs)
24
+ if @framework == "pt"
25
+ # TODO
26
+ # model_inputs = model_inputs.to(torch_dtype)
27
+ end
28
+ model_inputs
29
+ end
30
+
31
+ def _forward(model_inputs)
32
+ model_outputs = @model.(**model_inputs.to_h)
33
+ model_outputs
34
+ end
35
+
36
+ def postprocess(model_outputs, pool: nil, return_tensors: false)
37
+ pool = !pool.nil? ? pool : false
38
+
39
+ if pool
40
+ raise Todo
41
+ else
42
+ # [0] is the first available tensor, logits or last_hidden_state.
43
+ outputs = model_outputs[0]
44
+ end
45
+
46
+ if return_tensors
47
+ return outputs
48
+ end
49
+ if @framework == "pt"
50
+ outputs.to_a
51
+ else
52
+ raise Todo
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,53 @@
1
+ module Transformers
2
+ class PipelineDataset < Torch::Utils::Data::Dataset
3
+ def initialize(dataset, process, params)
4
+ @dataset = dataset
5
+ @process = process
6
+ @params = params
7
+ end
8
+
9
+ def size
10
+ @dataset.size
11
+ end
12
+
13
+ def [](i)
14
+ item = @dataset[i]
15
+ processed = @process.(item, **@params)
16
+ processed
17
+ end
18
+ end
19
+
20
+ class PipelineIterator < Torch::Utils::Data::IterableDataset
21
+ def initialize(loader, infer, params, loader_batch_size: nil)
22
+ @loader = loader
23
+ @infer = infer
24
+ @params = params
25
+ if loader_batch_size == 1
26
+ # Let's spare some time by deactivating altogether
27
+ loader_batch_size = nil
28
+ end
29
+ @loader_batch_size = loader_batch_size
30
+
31
+ # Internal bookkeeping
32
+ @loader_batch_index = nil
33
+ @loader_batch_data = nil
34
+ end
35
+
36
+ def size
37
+ @loader.size
38
+ end
39
+
40
+ def [](i)
41
+ @infer.(@loader[i], **@params)
42
+ end
43
+
44
+ def each
45
+ @iterator = @loader
46
+
47
+ @iterator.each do |item|
48
+ processed = @infer.(item, **@params)
49
+ yield processed
50
+ end
51
+ end
52
+ end
53
+ end