wiktionary 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
- metadata.gz: 8ab8f85d72bf98e429758c1fa47c139028fc8a14
4
- data.tar.gz: 9c926d3537990a2034ad24203cbdbe09f8dbb961
3
+ metadata.gz: 091eb3a349dd479feed4637a5beaef0900cea651
4
+ data.tar.gz: ca129fad7fefe18ee4d823d8ae0d94f5e5258536
5
5
  SHA512:
6
- metadata.gz: e841cd75cbe30ba85c2f2824a95bbee790363595284e79d0b8072d079f17b01fa20e0784965420225fde74c85e6baeab9282a680fac5cec0e2f217b8e27c69cc
7
- data.tar.gz: f94050c1d8f61a2ec7fd4dac52fbbaf72386a243a9aaf1365dd208718eba484653de839e3cb34b2dec51ad3aeebe7cde861ee31fa889786423c527a9cd569666
6
+ metadata.gz: 7d38ae673dcef87169985be6d6f2a189ae588f7d7a27df4e8a8e6944ec8d74394f6969797bd14100cdd230ac624ef164204067f00c6ae9a9544228ffb03111fe
7
+ data.tar.gz: a93e59d7997fc418488a6deddb22223bd021a287fa5c387b8588ff0f1a6908800256aba7fd77da236260021aa98a0ee58048edd98b0a00918def7dcbe2d3ccbd
@@ -0,0 +1 @@
1
+ *.gem
@@ -0,0 +1,64 @@
1
+ # Word conversion rules from Wiktionary
2
+
3
+ The aim of the project is providing ruls for converting English words and basic
4
+ informations about the words morphology. The project is based on the English
5
+ Wiktionary thus the descriptions are pretty accurate and include most of the
6
+ exceptions (e.g. ox -> oxen).
7
+
8
+ ## Basic usage
9
+
10
+ Installation:
11
+
12
+ ```
13
+ $ gem install wiktionary
14
+ ```
15
+
16
+ So far only nouns are supported.
17
+
18
+ ```ruby
19
+ require 'wiktionary/noun'
20
+
21
+ nouns = Wiktionary::Noun.new
22
+ nouns.singularize("dogs") #=> ["dog"]
23
+ nouns.singularize("oxen") #=> ["ox"]
24
+ nouns.singularize("feet") #=> ["foot"]
25
+
26
+ nouns.singular?("dog") #=> true
27
+ nouns.singular?("dogs") #=> false
28
+
29
+
30
+ nouns.plural?("dog") #=> false
31
+ nouns.plural?("dogs") #=> true
32
+ ```
33
+
34
+ The `singularize` method returns an array since there might be more than one
35
+ base form of a given plural word.
36
+
37
+ ## Data
38
+
39
+ The `data` directory - contains lists and mappings of noun forms (plural,
40
+ singular, countable, etc.). They are used to identify plural forms and
41
+ transform plural to singular. Generated from Wiktionary dump using
42
+ https://github.com/djstrong/nouns-with-plurals.
43
+
44
+ The following files are available:
45
+ * `noun.csv` - countable nouns
46
+ * `noun_countable_and_uncountable.csv` - e.g. http://en.wiktionary.org/wiki/beers
47
+ * `noun_uncountable.csv` - nouns that cannot be used freely with numbers or
48
+ the indefinite article, and which therefore takes no plural form, e.g.
49
+ http://en.wiktionary.org/wiki/lycra
50
+ * `noun_usually_uncountable.csv` - e.g. http://en.wiktionary.org/wiki/information
51
+ * `noun_unknown.csv` - nouns with unknown or uncertain plural
52
+ * `noun_pluralia_tantum.csv` - nouns that do not have singular forms, e.g. http://en.wiktionary.org/wiki/scissors
53
+ * `noun_not_attested.csv` - nouns with plural not attested
54
+
55
+ The following files contain two columns (singular and
56
+ plural form):
57
+ * `noun.csv`
58
+ * `noun_countable_and_uncountable.csv`
59
+ * `noun_usually_uncountable.csv`
60
+
61
+ ## Credits
62
+
63
+ Krzysztof Wróbel (djstrong)
64
+ Aleksander Smywiński-Pohl (apohllo)
@@ -1,4 +1,6 @@
1
+ # encoding: utf-8
1
2
  require 'csv'
3
+ require 'unicode_utils'
2
4
 
3
5
  module Wiktionary
4
6
  class Noun
@@ -39,7 +41,16 @@ module Wiktionary
39
41
 
40
42
  # Returns list of possible singular forms of noun.
41
43
  def singularize(noun)
42
- @plural_to_singulars[noun]
44
+ first_capital = false
45
+ if noun =~ /^\p{Lu}/
46
+ noun = UnicodeUtils.downcase(noun[0]) + noun[1..-1]
47
+ first_capital = true
48
+ end
49
+ result = @plural_to_singulars[noun]
50
+ if first_capital
51
+ result.map!{|word| UnicodeUtils.upcase(word[0]) + word[1..-1] }
52
+ end
53
+ result
43
54
  end
44
55
 
45
56
  private
@@ -70,13 +81,16 @@ module Wiktionary
70
81
  end
71
82
 
72
83
  def add(singular,plurals)
84
+ singular = UnicodeUtils.downcase(singular)
85
+ plurals = plurals.map{|pl| UnicodeUtils.downcase(pl) }
73
86
  plurals.each do |plural|
74
- @plural_to_singulars[plural] << singular
75
- @singular_to_plurals[singular] << plural
87
+ @plural_to_singulars[plural] << singular unless @plural_to_singulars[plural].include?(singular)
88
+ @singular_to_plurals[singular] << plural unless @singular_to_plurals[singular].include?(plural)
76
89
  end
77
90
  end
78
91
 
79
92
  def add_uncountable(singular)
93
+ singular = UnicodeUtils.downcase(singular)
80
94
  @singular_to_plurals[singular] ||= []
81
95
  end
82
96
  end
@@ -25,5 +25,7 @@ RSpec.describe Wiktionary::Noun do
25
25
  expect(@nouns.singularize("politics")).to include "politics"
26
26
  expect(@nouns.singularize("oxen")).to include "ox"
27
27
  expect(@nouns.singularize("feet")).to include "foot"
28
+ expect(@nouns.singularize("Dogs")).to include "Dog"
29
+ expect(@nouns.singularize("Γ rays")).to include "Γ ray"
28
30
  end
29
31
  end
@@ -0,0 +1,25 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "wiktionary"
3
+ s.version = "0.1.1"
4
+ s.date = "#{Time.now.strftime("%Y-%m-%d")}"
5
+ s.required_ruby_version = '>= 2.0.0'
6
+ s.authors = ['Krzysztof Wróbel','Aleksander Smywiński-Pohl']
7
+ s.email = ["djstrong@gmail.com","apohllo@o2.pl"]
8
+ s.homepage = "http://github.com/cycloped-io/wiktionary"
9
+ s.summary = "Word morphology and conversion based on Wiktionary"
10
+ s.description = "English words morphological description and basic conversion rules based on the English Wiktionary."
11
+ s.license = "http://opensource.org/licenses/MIT"
12
+
13
+ s.rubyforge_project = "wiktionary"
14
+ s.rdoc_options = ["--main", "Readme.md"]
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.require_path = "lib"
19
+
20
+ s.add_dependency("unicode_utils")
21
+
22
+ s.add_development_dependency("rspec", [">= 3.2.0","< 4.0.0"])
23
+ s.add_development_dependency("rake", [">= 10.4.0","< 11.0.0"])
24
+ end
25
+
metadata CHANGED
@@ -1,16 +1,30 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wiktionary
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
- - Krzysztofsz Wróbel
7
+ - Krzysztof Wróbel
8
8
  - Aleksander Smywiński-Pohl
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-08-21 00:00:00.000000000 Z
12
+ date: 2015-08-22 00:00:00.000000000 Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: unicode_utils
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ! '>='
19
+ - !ruby/object:Gem::Version
20
+ version: '0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ! '>='
26
+ - !ruby/object:Gem::Version
27
+ version: '0'
14
28
  - !ruby/object:Gem::Dependency
15
29
  name: rspec
16
30
  requirement: !ruby/object:Gem::Requirement
@@ -60,7 +74,9 @@ executables: []
60
74
  extensions: []
61
75
  extra_rdoc_files: []
62
76
  files:
77
+ - .gitignore
63
78
  - .rspec
79
+ - Readme.md
64
80
  - data/noun.csv
65
81
  - data/noun_countable_and_uncountable.csv
66
82
  - data/noun_non_attested.csv
@@ -72,6 +88,7 @@ files:
72
88
  - lib/wiktionary/noun.rb
73
89
  - spec/noun.rb
74
90
  - spec/spec_helper.rb
91
+ - wiktionary.gemspec
75
92
  homepage: http://github.com/cycloped-io/wiktionary
76
93
  licenses:
77
94
  - http://opensource.org/licenses/MIT