torchtext 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +30 -0
- data/README.md +62 -0
- data/lib/torchtext.rb +19 -0
- data/lib/torchtext/data/utils.rb +60 -0
- data/lib/torchtext/datasets/text_classification.rb +166 -0
- data/lib/torchtext/datasets/text_classification_dataset.rb +29 -0
- data/lib/torchtext/version.rb +3 -0
- data/lib/torchtext/vocab.rb +87 -0
- metadata +107 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 86469f8148e519b940a643f81b5317d3e180d6ebc031da14cb0599b48e3f6556
|
4
|
+
data.tar.gz: 499079c8a32de3ea6704b58a04ad8511f7a6784cc138b08e87696c69d7835863
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e3ea0d3719d35a58b757ac3d11adeda30912f35f69f7de37047ef702c556e5384862f950e055565db8396d8495a760b4919fd416affbdc0fd815dc14ed02e3a3
|
7
|
+
data.tar.gz: 16d2817864dc4bba2d54ca4a7288bc609b95ab5d59da51c67eccf70107fd78e67af141b765d01b8eea82c0a112b3dc779c174d7864afee6fb5651a5d787df7c5
|
data/CHANGELOG.md
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
BSD 3-Clause License
|
2
|
+
|
3
|
+
Copyright (c) James Bradbury and Soumith Chintala 2016,
|
4
|
+
Copyright (c) Andrew Kane 2020,
|
5
|
+
All rights reserved.
|
6
|
+
|
7
|
+
Redistribution and use in source and binary forms, with or without
|
8
|
+
modification, are permitted provided that the following conditions are met:
|
9
|
+
|
10
|
+
* Redistributions of source code must retain the above copyright notice, this
|
11
|
+
list of conditions and the following disclaimer.
|
12
|
+
|
13
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
14
|
+
this list of conditions and the following disclaimer in the documentation
|
15
|
+
and/or other materials provided with the distribution.
|
16
|
+
|
17
|
+
* Neither the name of the copyright holder nor the names of its
|
18
|
+
contributors may be used to endorse or promote products derived from
|
19
|
+
this software without specific prior written permission.
|
20
|
+
|
21
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
22
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
23
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
24
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
25
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
26
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
27
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
28
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
29
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
30
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
# TorchText
|
2
|
+
|
3
|
+
:fire: Data loaders and abstractions for text and NLP - for Ruby
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application’s Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'torchtext'
|
11
|
+
```
|
12
|
+
|
13
|
+
## Getting Started
|
14
|
+
|
15
|
+
This library follows the [Python API](https://pytorch.org/text/). Many methods and options are missing at the moment. PRs welcome!
|
16
|
+
|
17
|
+
## Examples
|
18
|
+
|
19
|
+
Text classification
|
20
|
+
|
21
|
+
- [PyTorch tutorial](https://pytorch.org/tutorials/beginner/text_sentiment_ngrams_tutorial.html)
|
22
|
+
- [Ruby code](examples/text_classification)
|
23
|
+
|
24
|
+
## Datasets
|
25
|
+
|
26
|
+
Load a dataset
|
27
|
+
|
28
|
+
```ruby
|
29
|
+
train_dataset, test_dataset = TorchText::Datasets::AG_NEWS.load(root: ".data", ngrams: 2)
|
30
|
+
```
|
31
|
+
|
32
|
+
Supported datasets are:
|
33
|
+
|
34
|
+
- [AG_NEWS](http://groups.di.unipi.it/~gulli/AG_corpus_of_news_articles.html)
|
35
|
+
|
36
|
+
## Disclaimer
|
37
|
+
|
38
|
+
This library downloads and prepares public datasets. We don’t host any datasets. Be sure to adhere to the license for each dataset.
|
39
|
+
|
40
|
+
If you’re a dataset owner and wish to update any details or remove it from this project, let us know.
|
41
|
+
|
42
|
+
## History
|
43
|
+
|
44
|
+
View the [changelog](https://github.com/ankane/torchtext/blob/master/CHANGELOG.md)
|
45
|
+
|
46
|
+
## Contributing
|
47
|
+
|
48
|
+
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
49
|
+
|
50
|
+
- [Report bugs](https://github.com/ankane/torchtext/issues)
|
51
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/torchtext/pulls)
|
52
|
+
- Write, clarify, or fix documentation
|
53
|
+
- Suggest or add new features
|
54
|
+
|
55
|
+
To get started with development:
|
56
|
+
|
57
|
+
```sh
|
58
|
+
git clone https://github.com/ankane/torchtext.git
|
59
|
+
cd torchtext
|
60
|
+
bundle install
|
61
|
+
bundle exec rake test
|
62
|
+
```
|
data/lib/torchtext.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# dependencies
|
2
|
+
require "torch"
|
3
|
+
|
4
|
+
# stdlib
|
5
|
+
require "csv"
|
6
|
+
require "fileutils"
|
7
|
+
require "rubygems/package"
|
8
|
+
require "set"
|
9
|
+
|
10
|
+
# modules
|
11
|
+
require "torchtext/data/utils"
|
12
|
+
require "torchtext/datasets/text_classification"
|
13
|
+
require "torchtext/datasets/text_classification_dataset"
|
14
|
+
require "torchtext/vocab"
|
15
|
+
require "torchtext/version"
|
16
|
+
|
17
|
+
module TorchText
|
18
|
+
class Error < StandardError; end
|
19
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module TorchText
|
2
|
+
module Data
|
3
|
+
module Utils
|
4
|
+
def tokenizer(tokenizer, language: "en")
|
5
|
+
return method(:split_tokenizer) if tokenizer.nil?
|
6
|
+
|
7
|
+
if tokenizer == "basic_english"
|
8
|
+
if language != "en"
|
9
|
+
raise ArgumentError, "Basic normalization is only available for English(en)"
|
10
|
+
end
|
11
|
+
return method(:basic_english_normalize)
|
12
|
+
end
|
13
|
+
|
14
|
+
raise "Not implemented yet"
|
15
|
+
end
|
16
|
+
|
17
|
+
def ngrams_iterator(token_list, ngrams)
|
18
|
+
return enum_for(:ngrams_iterator, token_list, ngrams) unless block_given?
|
19
|
+
|
20
|
+
get_ngrams = lambda do |n|
|
21
|
+
(token_list.size - n + 1).times.map { |i| token_list[i...(i + n)] }
|
22
|
+
end
|
23
|
+
|
24
|
+
token_list.each do |x|
|
25
|
+
yield x
|
26
|
+
end
|
27
|
+
2.upto(ngrams) do |n|
|
28
|
+
get_ngrams.call(n).each do |x|
|
29
|
+
yield x.join(" ")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def split_tokenizer(x)
|
37
|
+
x.split
|
38
|
+
end
|
39
|
+
|
40
|
+
_patterns = [%r{\'}, %r{\"}, %r{\.}, %r{<br \/>}, %r{,}, %r{\(}, %r{\)}, %r{\!}, %r{\?}, %r{\;}, %r{\:}, %r{\s+}]
|
41
|
+
_replacements = [" \' ", "", " . ", " ", " , ", " ( ", " ) ", " ! ", " ? ", " ", " ", " "]
|
42
|
+
|
43
|
+
PATTERNS_DICT = _patterns.zip(_replacements)
|
44
|
+
|
45
|
+
def basic_english_normalize(line)
|
46
|
+
line = line.downcase
|
47
|
+
|
48
|
+
PATTERNS_DICT.each do |pattern_re, replaced_str|
|
49
|
+
line.sub!(pattern_re, replaced_str)
|
50
|
+
end
|
51
|
+
line.split
|
52
|
+
end
|
53
|
+
|
54
|
+
extend self
|
55
|
+
end
|
56
|
+
|
57
|
+
# TODO only tokenizer method
|
58
|
+
extend Utils
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,166 @@
|
|
1
|
+
module TorchText
|
2
|
+
module Datasets
|
3
|
+
module TextClassification
|
4
|
+
URLS = {
|
5
|
+
"AG_NEWS" => "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbUDNpeUdjb0wxRms"
|
6
|
+
}
|
7
|
+
PATHS = {
|
8
|
+
"AG_NEWS" => "ag_news_csv"
|
9
|
+
}
|
10
|
+
FILENAMES = {
|
11
|
+
"AG_NEWS" => "ag_news_csv.tar.gz"
|
12
|
+
}
|
13
|
+
|
14
|
+
class << self
|
15
|
+
def ag_news(*args, **kwargs)
|
16
|
+
setup_datasets("AG_NEWS", *args, **kwargs)
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def setup_datasets(dataset_name, root: ".data", ngrams: 1, vocab: nil, include_unk: false)
|
22
|
+
dataset_tar = download_from_url(URLS[dataset_name], root: root, filename: FILENAMES[dataset_name])
|
23
|
+
to_path = extract_archive(dataset_tar)
|
24
|
+
extracted_files = Dir["#{to_path}/#{PATHS[dataset_name]}/*"]
|
25
|
+
|
26
|
+
train_csv_path = nil
|
27
|
+
test_csv_path = nil
|
28
|
+
extracted_files.each do |fname|
|
29
|
+
if fname.end_with?("train.csv")
|
30
|
+
train_csv_path = fname
|
31
|
+
elsif fname.end_with?("test.csv")
|
32
|
+
test_csv_path = fname
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
if vocab.nil?
|
37
|
+
vocab = Vocab.build_vocab_from_iterator(_csv_iterator(train_csv_path, ngrams))
|
38
|
+
else
|
39
|
+
unless vocab.is_a?(Vocab)
|
40
|
+
raise ArgumentError, "Passed vocabulary is not of type Vocab"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
train_data, train_labels = _create_data_from_iterator(vocab, _csv_iterator(train_csv_path, ngrams, yield_cls: true), include_unk)
|
44
|
+
test_data, test_labels = _create_data_from_iterator(vocab, _csv_iterator(test_csv_path, ngrams, yield_cls: true), include_unk)
|
45
|
+
if (train_labels ^ test_labels).length > 0
|
46
|
+
raise ArgumentError, "Training and test labels don't match"
|
47
|
+
end
|
48
|
+
|
49
|
+
[
|
50
|
+
TextClassificationDataset.new(vocab, train_data, train_labels),
|
51
|
+
TextClassificationDataset.new(vocab, test_data, test_labels)
|
52
|
+
]
|
53
|
+
end
|
54
|
+
|
55
|
+
def _csv_iterator(data_path, ngrams, yield_cls: false)
|
56
|
+
return enum_for(:_csv_iterator, data_path, ngrams, yield_cls: yield_cls) unless block_given?
|
57
|
+
|
58
|
+
tokenizer = Data.tokenizer("basic_english")
|
59
|
+
CSV.foreach(data_path) do |row|
|
60
|
+
tokens = row[1..-1].join(" ")
|
61
|
+
tokens = tokenizer.call(tokens)
|
62
|
+
if yield_cls
|
63
|
+
yield row[0].to_i - 1, Data::Utils.ngrams_iterator(tokens, ngrams)
|
64
|
+
else
|
65
|
+
yield Data::Utils.ngrams_iterator(tokens, ngrams)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def _create_data_from_iterator(vocab, iterator, include_unk)
|
71
|
+
data = []
|
72
|
+
labels = []
|
73
|
+
iterator.each do |cls, tokens|
|
74
|
+
if include_unk
|
75
|
+
tokens = Torch.tensor(tokens.map { |token| vocab[token] })
|
76
|
+
else
|
77
|
+
token_ids = tokens.map { |token| vocab[token] }.select { |x| x != Vocab::UNK }
|
78
|
+
tokens = Torch.tensor(token_ids)
|
79
|
+
end
|
80
|
+
data << [cls, tokens]
|
81
|
+
labels << cls
|
82
|
+
end
|
83
|
+
[data, Set.new(labels)]
|
84
|
+
end
|
85
|
+
|
86
|
+
# extra filename parameter
|
87
|
+
def download_from_url(url, root:, filename:)
|
88
|
+
path = File.join(root, filename)
|
89
|
+
return path if File.exist?(path)
|
90
|
+
|
91
|
+
FileUtils.mkdir_p(root)
|
92
|
+
|
93
|
+
puts "Downloading #{url}..."
|
94
|
+
download_url_to_file(url, path)
|
95
|
+
end
|
96
|
+
|
97
|
+
# follows redirects
|
98
|
+
def download_url_to_file(url, dst)
|
99
|
+
uri = URI(url)
|
100
|
+
tmp = nil
|
101
|
+
location = nil
|
102
|
+
|
103
|
+
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https") do |http|
|
104
|
+
request = Net::HTTP::Get.new(uri)
|
105
|
+
|
106
|
+
http.request(request) do |response|
|
107
|
+
case response
|
108
|
+
when Net::HTTPRedirection
|
109
|
+
location = response["location"]
|
110
|
+
when Net::HTTPSuccess
|
111
|
+
tmp = "#{Dir.tmpdir}/#{Time.now.to_f}" # TODO better name
|
112
|
+
File.open(tmp, "wb") do |f|
|
113
|
+
response.read_body do |chunk|
|
114
|
+
f.write(chunk)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
else
|
118
|
+
raise Error, "Bad response"
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
if location
|
124
|
+
download_url_to_file(location, dst)
|
125
|
+
else
|
126
|
+
FileUtils.mv(tmp, dst)
|
127
|
+
dst
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
# extract_tar_gz doesn't list files, so just return to_path
|
132
|
+
def extract_archive(from_path, to_path: nil, overwrite: nil)
|
133
|
+
to_path ||= File.dirname(from_path)
|
134
|
+
|
135
|
+
if from_path.end_with?(".tar.gz") || from_path.end_with?(".tgz")
|
136
|
+
File.open(from_path, "rb") do |io|
|
137
|
+
Gem::Package.new("").extract_tar_gz(io, to_path)
|
138
|
+
end
|
139
|
+
return to_path
|
140
|
+
end
|
141
|
+
|
142
|
+
raise "Not implemented yet"
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
DATASETS = {
|
147
|
+
"AG_NEWS" => method(:ag_news)
|
148
|
+
}
|
149
|
+
|
150
|
+
LABELS = {
|
151
|
+
"AG_NEWS" => {
|
152
|
+
0 => "World",
|
153
|
+
1 => "Sports",
|
154
|
+
2 => "Business",
|
155
|
+
3 => "Sci/Tech"
|
156
|
+
}
|
157
|
+
}
|
158
|
+
end
|
159
|
+
|
160
|
+
class AG_NEWS
|
161
|
+
def self.load(*args, **kwargs)
|
162
|
+
TextClassification.ag_news(*args, **kwargs)
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module TorchText
|
2
|
+
module Datasets
|
3
|
+
class TextClassificationDataset < Torch::Utils::Data::Dataset
|
4
|
+
attr_reader :labels, :vocab
|
5
|
+
|
6
|
+
def initialize(vocab, data, labels)
|
7
|
+
super()
|
8
|
+
@data = data
|
9
|
+
@labels = labels
|
10
|
+
@vocab = vocab
|
11
|
+
end
|
12
|
+
|
13
|
+
def [](i)
|
14
|
+
@data[i]
|
15
|
+
end
|
16
|
+
|
17
|
+
def length
|
18
|
+
@data.length
|
19
|
+
end
|
20
|
+
alias_method :size, :length
|
21
|
+
|
22
|
+
def each
|
23
|
+
@data.each do |x|
|
24
|
+
yield x
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
module TorchText
|
2
|
+
class Vocab
|
3
|
+
UNK = "<unk>"
|
4
|
+
|
5
|
+
def initialize(
|
6
|
+
counter, max_size: nil, min_freq: 1, specials: ["<unk>", "<pad>"],
|
7
|
+
vectors: nil, unk_init: nil, vectors_cache: nil, specials_first: true
|
8
|
+
)
|
9
|
+
|
10
|
+
@freqs = counter
|
11
|
+
counter = counter.dup
|
12
|
+
min_freq = [min_freq, 1].max
|
13
|
+
|
14
|
+
@itos = []
|
15
|
+
@unk_index = nil
|
16
|
+
|
17
|
+
if specials_first
|
18
|
+
@itos = specials
|
19
|
+
# only extend max size if specials are prepended
|
20
|
+
max_size += specials.size if max_size
|
21
|
+
end
|
22
|
+
|
23
|
+
# frequencies of special tokens are not counted when building vocabulary
|
24
|
+
# in frequency order
|
25
|
+
specials.each do |tok|
|
26
|
+
counter.delete(tok)
|
27
|
+
end
|
28
|
+
|
29
|
+
# sort by frequency, then alphabetically
|
30
|
+
words_and_frequencies = counter.sort_by { |k, v| [-v, k] }
|
31
|
+
|
32
|
+
words_and_frequencies.each do |word, freq|
|
33
|
+
break if freq < min_freq || @itos.length == max_size
|
34
|
+
@itos << word
|
35
|
+
end
|
36
|
+
|
37
|
+
if specials.include?(UNK) # hard-coded for now
|
38
|
+
unk_index = specials.index(UNK) # position in list
|
39
|
+
# account for ordering of specials, set variable
|
40
|
+
@unk_index = specials_first ? unk_index : @itos.length + unk_index
|
41
|
+
@stoi = Hash.new(@unk_index)
|
42
|
+
else
|
43
|
+
@stoi = {}
|
44
|
+
end
|
45
|
+
|
46
|
+
if !specials_first
|
47
|
+
@itos.concat(specials)
|
48
|
+
end
|
49
|
+
|
50
|
+
# stoi is simply a reverse dict for itos
|
51
|
+
@itos.each_with_index do |tok, i|
|
52
|
+
@stoi[tok] = i
|
53
|
+
end
|
54
|
+
|
55
|
+
@vectors = nil
|
56
|
+
if !vectors.nil?
|
57
|
+
# self.load_vectors(vectors, unk_init=unk_init, cache=vectors_cache)
|
58
|
+
raise "Not implemented yet"
|
59
|
+
else
|
60
|
+
raise "Failed assertion" unless unk_init.nil?
|
61
|
+
raise "Failed assertion" unless vectors_cache.nil?
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def [](token)
|
66
|
+
@stoi.fetch(token, @stoi.fetch(UNK))
|
67
|
+
end
|
68
|
+
|
69
|
+
def length
|
70
|
+
@itos.length
|
71
|
+
end
|
72
|
+
alias_method :size, :length
|
73
|
+
|
74
|
+
def self.build_vocab_from_iterator(iterator)
|
75
|
+
counter = Hash.new(0)
|
76
|
+
i = 0
|
77
|
+
iterator.each do |tokens|
|
78
|
+
tokens.each do |token|
|
79
|
+
counter[token] += 1
|
80
|
+
end
|
81
|
+
i += 1
|
82
|
+
puts "Processed #{i}" if i % 10000 == 0
|
83
|
+
end
|
84
|
+
Vocab.new(counter)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
metadata
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: torchtext
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andrew Kane
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-08-24 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: torch-rb
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.3.2
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.3.2
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '5'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '5'
|
69
|
+
description:
|
70
|
+
email: andrew@chartkick.com
|
71
|
+
executables: []
|
72
|
+
extensions: []
|
73
|
+
extra_rdoc_files: []
|
74
|
+
files:
|
75
|
+
- CHANGELOG.md
|
76
|
+
- LICENSE.txt
|
77
|
+
- README.md
|
78
|
+
- lib/torchtext.rb
|
79
|
+
- lib/torchtext/data/utils.rb
|
80
|
+
- lib/torchtext/datasets/text_classification.rb
|
81
|
+
- lib/torchtext/datasets/text_classification_dataset.rb
|
82
|
+
- lib/torchtext/version.rb
|
83
|
+
- lib/torchtext/vocab.rb
|
84
|
+
homepage: https://github.com/ankane/torchtext
|
85
|
+
licenses:
|
86
|
+
- BSD-3-Clause
|
87
|
+
metadata: {}
|
88
|
+
post_install_message:
|
89
|
+
rdoc_options: []
|
90
|
+
require_paths:
|
91
|
+
- lib
|
92
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '2.5'
|
97
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
98
|
+
requirements:
|
99
|
+
- - ">="
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
requirements: []
|
103
|
+
rubygems_version: 3.1.2
|
104
|
+
signing_key:
|
105
|
+
specification_version: 4
|
106
|
+
summary: Data loaders and abstractions for text and NLP
|
107
|
+
test_files: []
|