wiktionary 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Readme.md +64 -0
- data/lib/wiktionary/noun.rb +17 -3
- data/spec/noun.rb +2 -0
- data/wiktionary.gemspec +25 -0
- metadata +20 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 091eb3a349dd479feed4637a5beaef0900cea651
|
4
|
+
data.tar.gz: ca129fad7fefe18ee4d823d8ae0d94f5e5258536
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7d38ae673dcef87169985be6d6f2a189ae588f7d7a27df4e8a8e6944ec8d74394f6969797bd14100cdd230ac624ef164204067f00c6ae9a9544228ffb03111fe
|
7
|
+
data.tar.gz: a93e59d7997fc418488a6deddb22223bd021a287fa5c387b8588ff0f1a6908800256aba7fd77da236260021aa98a0ee58048edd98b0a00918def7dcbe2d3ccbd
|
data/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
*.gem
|
data/Readme.md
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# Word conversion rules from Wiktionary
|
2
|
+
|
3
|
+
The aim of the project is providing ruls for converting English words and basic
|
4
|
+
informations about the words morphology. The project is based on the English
|
5
|
+
Wiktionary thus the descriptions are pretty accurate and include most of the
|
6
|
+
exceptions (e.g. ox -> oxen).
|
7
|
+
|
8
|
+
## Basic usage
|
9
|
+
|
10
|
+
Installation:
|
11
|
+
|
12
|
+
```
|
13
|
+
$ gem install wiktionary
|
14
|
+
```
|
15
|
+
|
16
|
+
So far only nouns are supported.
|
17
|
+
|
18
|
+
```ruby
|
19
|
+
require 'wiktionary/noun'
|
20
|
+
|
21
|
+
nouns = Wiktionary::Noun.new
|
22
|
+
nouns.singularize("dogs") #=> ["dog"]
|
23
|
+
nouns.singularize("oxen") #=> ["ox"]
|
24
|
+
nouns.singularize("feet") #=> ["foot"]
|
25
|
+
|
26
|
+
nouns.singular?("dog") #=> true
|
27
|
+
nouns.singular?("dogs") #=> false
|
28
|
+
|
29
|
+
|
30
|
+
nouns.plural?("dog") #=> false
|
31
|
+
nouns.plural?("dogs") #=> true
|
32
|
+
```
|
33
|
+
|
34
|
+
The `singularize` method returns an array since there might be more than one
|
35
|
+
base form of a given plural word.
|
36
|
+
|
37
|
+
## Data
|
38
|
+
|
39
|
+
The `data` directory - contains lists and mappings of noun forms (plural,
|
40
|
+
singular, countable, etc.). They are used to identify plural forms and
|
41
|
+
transform plural to singular. Generated from Wiktionary dump using
|
42
|
+
https://github.com/djstrong/nouns-with-plurals.
|
43
|
+
|
44
|
+
The following files are available:
|
45
|
+
* `noun.csv` - countable nouns
|
46
|
+
* `noun_countable_and_uncountable.csv` - e.g. http://en.wiktionary.org/wiki/beers
|
47
|
+
* `noun_uncountable.csv` - nouns that cannot be used freely with numbers or
|
48
|
+
the indefinite article, and which therefore takes no plural form, e.g.
|
49
|
+
http://en.wiktionary.org/wiki/lycra
|
50
|
+
* `noun_usually_uncountable.csv` - e.g. http://en.wiktionary.org/wiki/information
|
51
|
+
* `noun_unknown.csv` - nouns with unknown or uncertain plural
|
52
|
+
* `noun_pluralia_tantum.csv` - nouns that do not have singular forms, e.g. http://en.wiktionary.org/wiki/scissors
|
53
|
+
* `noun_not_attested.csv` - nouns with plural not attested
|
54
|
+
|
55
|
+
The following files contain two columns (singular and
|
56
|
+
plural form):
|
57
|
+
* `noun.csv`
|
58
|
+
* `noun_countable_and_uncountable.csv`
|
59
|
+
* `noun_usually_uncountable.csv`
|
60
|
+
|
61
|
+
## Credits
|
62
|
+
|
63
|
+
Krzysztof Wróbel (djstrong)
|
64
|
+
Aleksander Smywiński-Pohl (apohllo)
|
data/lib/wiktionary/noun.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
+
# encoding: utf-8
|
1
2
|
require 'csv'
|
3
|
+
require 'unicode_utils'
|
2
4
|
|
3
5
|
module Wiktionary
|
4
6
|
class Noun
|
@@ -39,7 +41,16 @@ module Wiktionary
|
|
39
41
|
|
40
42
|
# Returns list of possible singular forms of noun.
|
41
43
|
def singularize(noun)
|
42
|
-
|
44
|
+
first_capital = false
|
45
|
+
if noun =~ /^\p{Lu}/
|
46
|
+
noun = UnicodeUtils.downcase(noun[0]) + noun[1..-1]
|
47
|
+
first_capital = true
|
48
|
+
end
|
49
|
+
result = @plural_to_singulars[noun]
|
50
|
+
if first_capital
|
51
|
+
result.map!{|word| UnicodeUtils.upcase(word[0]) + word[1..-1] }
|
52
|
+
end
|
53
|
+
result
|
43
54
|
end
|
44
55
|
|
45
56
|
private
|
@@ -70,13 +81,16 @@ module Wiktionary
|
|
70
81
|
end
|
71
82
|
|
72
83
|
def add(singular,plurals)
|
84
|
+
singular = UnicodeUtils.downcase(singular)
|
85
|
+
plurals = plurals.map{|pl| UnicodeUtils.downcase(pl) }
|
73
86
|
plurals.each do |plural|
|
74
|
-
@plural_to_singulars[plural] << singular
|
75
|
-
@singular_to_plurals[singular] << plural
|
87
|
+
@plural_to_singulars[plural] << singular unless @plural_to_singulars[plural].include?(singular)
|
88
|
+
@singular_to_plurals[singular] << plural unless @singular_to_plurals[singular].include?(plural)
|
76
89
|
end
|
77
90
|
end
|
78
91
|
|
79
92
|
def add_uncountable(singular)
|
93
|
+
singular = UnicodeUtils.downcase(singular)
|
80
94
|
@singular_to_plurals[singular] ||= []
|
81
95
|
end
|
82
96
|
end
|
data/spec/noun.rb
CHANGED
@@ -25,5 +25,7 @@ RSpec.describe Wiktionary::Noun do
|
|
25
25
|
expect(@nouns.singularize("politics")).to include "politics"
|
26
26
|
expect(@nouns.singularize("oxen")).to include "ox"
|
27
27
|
expect(@nouns.singularize("feet")).to include "foot"
|
28
|
+
expect(@nouns.singularize("Dogs")).to include "Dog"
|
29
|
+
expect(@nouns.singularize("Γ rays")).to include "Γ ray"
|
28
30
|
end
|
29
31
|
end
|
data/wiktionary.gemspec
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "wiktionary"
|
3
|
+
s.version = "0.1.1"
|
4
|
+
s.date = "#{Time.now.strftime("%Y-%m-%d")}"
|
5
|
+
s.required_ruby_version = '>= 2.0.0'
|
6
|
+
s.authors = ['Krzysztof Wróbel','Aleksander Smywiński-Pohl']
|
7
|
+
s.email = ["djstrong@gmail.com","apohllo@o2.pl"]
|
8
|
+
s.homepage = "http://github.com/cycloped-io/wiktionary"
|
9
|
+
s.summary = "Word morphology and conversion based on Wiktionary"
|
10
|
+
s.description = "English words morphological description and basic conversion rules based on the English Wiktionary."
|
11
|
+
s.license = "http://opensource.org/licenses/MIT"
|
12
|
+
|
13
|
+
s.rubyforge_project = "wiktionary"
|
14
|
+
s.rdoc_options = ["--main", "Readme.md"]
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.require_path = "lib"
|
19
|
+
|
20
|
+
s.add_dependency("unicode_utils")
|
21
|
+
|
22
|
+
s.add_development_dependency("rspec", [">= 3.2.0","< 4.0.0"])
|
23
|
+
s.add_development_dependency("rake", [">= 10.4.0","< 11.0.0"])
|
24
|
+
end
|
25
|
+
|
metadata
CHANGED
@@ -1,16 +1,30 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wiktionary
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
-
|
7
|
+
- Krzysztof Wróbel
|
8
8
|
- Aleksander Smywiński-Pohl
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-08-
|
12
|
+
date: 2015-08-22 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: unicode_utils
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ! '>='
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ! '>='
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0'
|
14
28
|
- !ruby/object:Gem::Dependency
|
15
29
|
name: rspec
|
16
30
|
requirement: !ruby/object:Gem::Requirement
|
@@ -60,7 +74,9 @@ executables: []
|
|
60
74
|
extensions: []
|
61
75
|
extra_rdoc_files: []
|
62
76
|
files:
|
77
|
+
- .gitignore
|
63
78
|
- .rspec
|
79
|
+
- Readme.md
|
64
80
|
- data/noun.csv
|
65
81
|
- data/noun_countable_and_uncountable.csv
|
66
82
|
- data/noun_non_attested.csv
|
@@ -72,6 +88,7 @@ files:
|
|
72
88
|
- lib/wiktionary/noun.rb
|
73
89
|
- spec/noun.rb
|
74
90
|
- spec/spec_helper.rb
|
91
|
+
- wiktionary.gemspec
|
75
92
|
homepage: http://github.com/cycloped-io/wiktionary
|
76
93
|
licenses:
|
77
94
|
- http://opensource.org/licenses/MIT
|