wiktionary 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
- metadata.gz: 8ab8f85d72bf98e429758c1fa47c139028fc8a14
4
- data.tar.gz: 9c926d3537990a2034ad24203cbdbe09f8dbb961
3
+ metadata.gz: 091eb3a349dd479feed4637a5beaef0900cea651
4
+ data.tar.gz: ca129fad7fefe18ee4d823d8ae0d94f5e5258536
5
5
  SHA512:
6
- metadata.gz: e841cd75cbe30ba85c2f2824a95bbee790363595284e79d0b8072d079f17b01fa20e0784965420225fde74c85e6baeab9282a680fac5cec0e2f217b8e27c69cc
7
- data.tar.gz: f94050c1d8f61a2ec7fd4dac52fbbaf72386a243a9aaf1365dd208718eba484653de839e3cb34b2dec51ad3aeebe7cde861ee31fa889786423c527a9cd569666
6
+ metadata.gz: 7d38ae673dcef87169985be6d6f2a189ae588f7d7a27df4e8a8e6944ec8d74394f6969797bd14100cdd230ac624ef164204067f00c6ae9a9544228ffb03111fe
7
+ data.tar.gz: a93e59d7997fc418488a6deddb22223bd021a287fa5c387b8588ff0f1a6908800256aba7fd77da236260021aa98a0ee58048edd98b0a00918def7dcbe2d3ccbd
@@ -0,0 +1 @@
1
+ *.gem
@@ -0,0 +1,64 @@
1
+ # Word conversion rules from Wiktionary
2
+
3
+ The aim of the project is providing ruls for converting English words and basic
4
+ informations about the words morphology. The project is based on the English
5
+ Wiktionary thus the descriptions are pretty accurate and include most of the
6
+ exceptions (e.g. ox -> oxen).
7
+
8
+ ## Basic usage
9
+
10
+ Installation:
11
+
12
+ ```
13
+ $ gem install wiktionary
14
+ ```
15
+
16
+ So far only nouns are supported.
17
+
18
+ ```ruby
19
+ require 'wiktionary/noun'
20
+
21
+ nouns = Wiktionary::Noun.new
22
+ nouns.singularize("dogs") #=> ["dog"]
23
+ nouns.singularize("oxen") #=> ["ox"]
24
+ nouns.singularize("feet") #=> ["foot"]
25
+
26
+ nouns.singular?("dog") #=> true
27
+ nouns.singular?("dogs") #=> false
28
+
29
+
30
+ nouns.plural?("dog") #=> false
31
+ nouns.plural?("dogs") #=> true
32
+ ```
33
+
34
+ The `singularize` method returns an array since there might be more than one
35
+ base form of a given plural word.
36
+
37
+ ## Data
38
+
39
+ The `data` directory - contains lists and mappings of noun forms (plural,
40
+ singular, countable, etc.). They are used to identify plural forms and
41
+ transform plural to singular. Generated from Wiktionary dump using
42
+ https://github.com/djstrong/nouns-with-plurals.
43
+
44
+ The following files are available:
45
+ * `noun.csv` - countable nouns
46
+ * `noun_countable_and_uncountable.csv` - e.g. http://en.wiktionary.org/wiki/beers
47
+ * `noun_uncountable.csv` - nouns that cannot be used freely with numbers or
48
+ the indefinite article, and which therefore takes no plural form, e.g.
49
+ http://en.wiktionary.org/wiki/lycra
50
+ * `noun_usually_uncountable.csv` - e.g. http://en.wiktionary.org/wiki/information
51
+ * `noun_unknown.csv` - nouns with unknown or uncertain plural
52
+ * `noun_pluralia_tantum.csv` - nouns that do not have singular forms, e.g. http://en.wiktionary.org/wiki/scissors
53
+ * `noun_not_attested.csv` - nouns with plural not attested
54
+
55
+ The following files contain two columns (singular and
56
+ plural form):
57
+ * `noun.csv`
58
+ * `noun_countable_and_uncountable.csv`
59
+ * `noun_usually_uncountable.csv`
60
+
61
+ ## Credits
62
+
63
+ Krzysztof Wróbel (djstrong)
64
+ Aleksander Smywiński-Pohl (apohllo)
@@ -1,4 +1,6 @@
1
+ # encoding: utf-8
1
2
  require 'csv'
3
+ require 'unicode_utils'
2
4
 
3
5
  module Wiktionary
4
6
  class Noun
@@ -39,7 +41,16 @@ module Wiktionary
39
41
 
40
42
  # Returns list of possible singular forms of noun.
41
43
  def singularize(noun)
42
- @plural_to_singulars[noun]
44
+ first_capital = false
45
+ if noun =~ /^\p{Lu}/
46
+ noun = UnicodeUtils.downcase(noun[0]) + noun[1..-1]
47
+ first_capital = true
48
+ end
49
+ result = @plural_to_singulars[noun]
50
+ if first_capital
51
+ result.map!{|word| UnicodeUtils.upcase(word[0]) + word[1..-1] }
52
+ end
53
+ result
43
54
  end
44
55
 
45
56
  private
@@ -70,13 +81,16 @@ module Wiktionary
70
81
  end
71
82
 
72
83
  def add(singular,plurals)
84
+ singular = UnicodeUtils.downcase(singular)
85
+ plurals = plurals.map{|pl| UnicodeUtils.downcase(pl) }
73
86
  plurals.each do |plural|
74
- @plural_to_singulars[plural] << singular
75
- @singular_to_plurals[singular] << plural
87
+ @plural_to_singulars[plural] << singular unless @plural_to_singulars[plural].include?(singular)
88
+ @singular_to_plurals[singular] << plural unless @singular_to_plurals[singular].include?(plural)
76
89
  end
77
90
  end
78
91
 
79
92
  def add_uncountable(singular)
93
+ singular = UnicodeUtils.downcase(singular)
80
94
  @singular_to_plurals[singular] ||= []
81
95
  end
82
96
  end
@@ -25,5 +25,7 @@ RSpec.describe Wiktionary::Noun do
25
25
  expect(@nouns.singularize("politics")).to include "politics"
26
26
  expect(@nouns.singularize("oxen")).to include "ox"
27
27
  expect(@nouns.singularize("feet")).to include "foot"
28
+ expect(@nouns.singularize("Dogs")).to include "Dog"
29
+ expect(@nouns.singularize("Γ rays")).to include "Γ ray"
28
30
  end
29
31
  end
@@ -0,0 +1,25 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "wiktionary"
3
+ s.version = "0.1.1"
4
+ s.date = "#{Time.now.strftime("%Y-%m-%d")}"
5
+ s.required_ruby_version = '>= 2.0.0'
6
+ s.authors = ['Krzysztof Wróbel','Aleksander Smywiński-Pohl']
7
+ s.email = ["djstrong@gmail.com","apohllo@o2.pl"]
8
+ s.homepage = "http://github.com/cycloped-io/wiktionary"
9
+ s.summary = "Word morphology and conversion based on Wiktionary"
10
+ s.description = "English words morphological description and basic conversion rules based on the English Wiktionary."
11
+ s.license = "http://opensource.org/licenses/MIT"
12
+
13
+ s.rubyforge_project = "wiktionary"
14
+ s.rdoc_options = ["--main", "Readme.md"]
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.require_path = "lib"
19
+
20
+ s.add_dependency("unicode_utils")
21
+
22
+ s.add_development_dependency("rspec", [">= 3.2.0","< 4.0.0"])
23
+ s.add_development_dependency("rake", [">= 10.4.0","< 11.0.0"])
24
+ end
25
+
metadata CHANGED
@@ -1,16 +1,30 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wiktionary
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
- - Krzysztofsz Wróbel
7
+ - Krzysztof Wróbel
8
8
  - Aleksander Smywiński-Pohl
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-08-21 00:00:00.000000000 Z
12
+ date: 2015-08-22 00:00:00.000000000 Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: unicode_utils
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ! '>='
19
+ - !ruby/object:Gem::Version
20
+ version: '0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ! '>='
26
+ - !ruby/object:Gem::Version
27
+ version: '0'
14
28
  - !ruby/object:Gem::Dependency
15
29
  name: rspec
16
30
  requirement: !ruby/object:Gem::Requirement
@@ -60,7 +74,9 @@ executables: []
60
74
  extensions: []
61
75
  extra_rdoc_files: []
62
76
  files:
77
+ - .gitignore
63
78
  - .rspec
79
+ - Readme.md
64
80
  - data/noun.csv
65
81
  - data/noun_countable_and_uncountable.csv
66
82
  - data/noun_non_attested.csv
@@ -72,6 +88,7 @@ files:
72
88
  - lib/wiktionary/noun.rb
73
89
  - spec/noun.rb
74
90
  - spec/spec_helper.rb
91
+ - wiktionary.gemspec
75
92
  homepage: http://github.com/cycloped-io/wiktionary
76
93
  licenses:
77
94
  - http://opensource.org/licenses/MIT