strsyntax 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'strsyntax'
5
+
6
+ options = {
7
+ :command => :structure
8
+ }
9
+
10
+ opts = OptionParser.new do |opts|
11
+ opts.banner = "Usage: string_structure [options] word"
12
+ opts.separator ""
13
+
14
+ opts.on( "-y", "--syntax", "Splits string into syntax elements: [:cv,:vc,:vc]" ) do
15
+ options[:command] = :syntax
16
+ end
17
+
18
+ opts.on( "-p", "--parts", "Splits string into syntax instances: ['ro','ut','er']" ) do
19
+ options[:command] = :parts
20
+ end
21
+
22
+ opts.on( "-s", "--structure", "Splits string into syntax structure: [[:cv,'ro'],[:vc,'ut'],[:vc,'er']]" ) do
23
+ options[:command] = :structure
24
+ end
25
+
26
+ opts.separator ""
27
+
28
+ opts.parse!( ARGV )
29
+ end
30
+
31
+ str = opts.default_argv.first
32
+
33
+ case options[:command]
34
+ when :syntax
35
+ puts str.syntax.inspect
36
+ when :parts
37
+ puts str.parts.inspect
38
+ when :structure
39
+ puts str.structure.inspect
40
+ else
41
+ puts opts
42
+ end
@@ -0,0 +1,2 @@
1
+ require 'strsyntax/strsyntax'
2
+ require 'strsyntax/stdlib_ext'
@@ -0,0 +1,53 @@
1
+ #
2
+ # Extend standard library classes with methods required by Ngrams
3
+ #
4
+ if !String.respond_to? :syntax
5
+ class String
6
+ # Returns an array containing the syntax of the string in terms of consonant and vowel
7
+ # groups. The groups are CVC, VCV, CV, VC, C, and V. For example:
8
+ #
9
+ # "groucho".syntax => [:c,:cv,:vc,:cv]
10
+ # "harpo".syntax => [:cvc,:cv]
11
+ # "chico".syntax => [:c,:cvc,:v]
12
+ # "zeppo".syntax => [:cvc,:cv]
13
+ # "teasdale".syntax => [:cv,:vc,:cvc,:v]
14
+ def syntax
15
+ StringSyntax::Parser.parse( self )
16
+ end
17
+ end
18
+ else
19
+ raise "Cannot patch in String#syntax as it is already defined!"
20
+ end
21
+
22
+ if !String.respond_to? :parts
23
+ class String
24
+ # Return an array containing the constituent parts of the string as represented by
25
+ # its syntax. For example:
26
+ #
27
+ # "groucho".parts => ["g","ro","uc","ho"]
28
+ # "harpo".parts => ["har","po"]
29
+ # "chico".parts => ["c","hic", "o"]
30
+ # "zeppo".parts => ["zep","po"]
31
+ # "teasdale".parts => ["te","as","dal","e"]
32
+ def parts
33
+ StringSyntax::Parser.split( self )
34
+ end
35
+ end
36
+ else
37
+ raise "Cannot patch in String#parts as it is already defined!"
38
+ end
39
+
40
+ if !String.respond_to? :structure
41
+ class String
42
+ # Returns an array containing an amalgam of the syntax and parts of the string
43
+ # See #syntax and #parts for more information. Example
44
+ #
45
+ # "groucho".stmap => [[:c,"g"],[:cv,"ro"],[:vc,"uc"],[:cv,"ho"]]
46
+ #
47
+ def structure
48
+ self.syntax.zip( self.parts )
49
+ end
50
+ end
51
+ else
52
+ raise "Cannot patch in String#structure as it is already defined!"
53
+ end
@@ -0,0 +1,119 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Can we decode any word into some combination of
4
+ # CV, VC, CVC, VCV
5
+ # ?
6
+ #
7
+
8
+ module StringSyntax
9
+ class Parser
10
+ C = /^([bcdfghjklmnpqrstvwxyz])/
11
+ V = /^([aeiou])/
12
+ CV = /^([bcdfghjklmnpqrstvwxyz][aeiou])/
13
+ CVC = /^([bcdfghjklmnpqrstvwxyz][aeiou][bcdfghjklmnpqrstvwxyz])/
14
+ VC = /^([aeiou][bcdfghjklmnpqrstvwxyz])/
15
+ VCV = /^([aeiou][bcdfghjklmnpqrstvwxyz][aeiou])/
16
+
17
+ SCHEMA_TEMPLATES = {
18
+ C => :c,
19
+ V => :v,
20
+ CV => :cv,
21
+ CVC => :cvc,
22
+ VC => :vc,
23
+ VCV => :vcv
24
+ }
25
+
26
+ # Returns an array containing the components of the string in terms of consonant
27
+ # and vowel groupings: CVC, VCV, CV, VC, C, V.
28
+ def self.parse( s )
29
+ s = s.downcase
30
+
31
+ structure = case s
32
+ when CVC
33
+ [ parse_subpart( CVC, s[0,3], s[(3..-1)] ), parse_subpart( CV, s[0,2], s[(2..-1)] ) ]
34
+ when CV
35
+ [ parse_subpart( CV, s[0,2], s[(2..-1)] ), parse_subpart( C, s[0,1], s[(1..-1)] ) ]
36
+ when C
37
+ [ parse_subpart( C, s[0,1], s[(1..-1)] ) ]
38
+ when VCV
39
+ [ parse_subpart( VCV, s[0,3], s[(3..-1)] ), parse_subpart( VC, s[0,2], s[(2..-1)] ) ]
40
+ when VC
41
+ [ parse_subpart( VC, s[0,2], s[(2..-1)] ), parse_subpart( V, s[0,1], s[(1..-1)] ) ]
42
+ when V
43
+ [ parse_subpart( V, s[0,1], s[(1..-1)] ) ]
44
+ else
45
+ []
46
+ end
47
+
48
+ # Return the sub-structure containing the least number of stray :c and :v
49
+ structure.sort_by { |s| score( s ) }.first
50
+ end
51
+
52
+ # Return the number of :c and :v components in the structure
53
+ def self.score( s )
54
+ s.inject( 0 ) { |sum, e| e == :c || e == :v ? sum + 1 : sum }
55
+ end
56
+
57
+ def self.split( s, structure = parse( s ) )
58
+ templates = structure.map { |schema| template_from_schema( schema ) }
59
+ templates.map do |template|
60
+ match_data = template.match( s )
61
+ s = match_data.post_match
62
+ match_data[1]
63
+ end
64
+ end
65
+
66
+ def self.schema_from_template( template )
67
+ SCHEMA_TEMPLATES[template]
68
+ end
69
+
70
+ def self.template_from_schema( schema )
71
+ case schema
72
+ when :cvc
73
+ CVC
74
+ when :vcv
75
+ VCV
76
+ when :cv
77
+ CV
78
+ when :vc
79
+ VC
80
+ when :c
81
+ C
82
+ when :v
83
+ V
84
+ else
85
+ raise "Unknown schema: #{schema}"
86
+ end
87
+ end
88
+
89
+ def self.schema_from_string( s )
90
+ case s
91
+ when CVC
92
+ :cvc
93
+ when CV
94
+ :cv
95
+ when VCV
96
+ :vcv
97
+ when VC
98
+ :vc
99
+ when C
100
+ :c
101
+ when V
102
+ :v
103
+ end
104
+ end
105
+
106
+ private
107
+ def self.parse_subpart( template, match, rest )
108
+ [ schema_from_template( template ), *parse( rest ) ].compact
109
+ end
110
+ end
111
+ end
112
+
113
+ if __FILE__ == $0
114
+ WORDS = %w( hello google amazon ookles paoga linguistics antidisestablishmentarianism )
115
+
116
+ WORDS.each do |word|
117
+ puts "#{word} -> #{ StringSyntax::Parser.parse( word ).inspect}"
118
+ end
119
+ end
@@ -0,0 +1,30 @@
1
+ $:<< File.join( File.dirname( __FILE__ ), '..', 'lib' )
2
+
3
+ require 'strsyntax'
4
+ require 'test/unit'
5
+
6
+ class TestParser < Test::Unit::TestCase
7
+
8
+ def test_syntax
9
+ assert_equal [:cvc,:cv], "hello".syntax
10
+ assert_equal [:vc,:c,:cvc,:vcv], "amplitude".syntax
11
+ assert_equal [:c,:cvc,:c,:cvc,:cvc], "transmitter".syntax
12
+ assert_equal [:cv,:vc,:cv], "google".syntax
13
+ assert_equal [:vcv,:cv,:cv,:vc], "aluminium".syntax
14
+ assert_equal [:cv,:vc,:cv,:cv], "teasdale".syntax
15
+ end
16
+
17
+ def test_parts
18
+ assert_equal ["hel","lo"], "hello".parts
19
+ assert_equal ["am","p","lit","ude"], "amplitude".parts
20
+ assert_equal ["t","ran","s","mit","ter"], "transmitter".parts
21
+ assert_equal ["go","og","le"], "google".parts
22
+ assert_equal ["alu","mi", "ni","um"], "aluminium".parts
23
+ assert_equal ["te","as","da","le"], "teasdale".parts
24
+ end
25
+
26
+ def test_structure
27
+ assert_equal [[:c,"g"],[:cv,"ro"],[:vc,"uc"],[:cv,"ho"]], "groucho".structure
28
+ end
29
+
30
+ end
metadata ADDED
@@ -0,0 +1,50 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.0
3
+ specification_version: !int:Fixnum 1
4
+ name: strsyntax
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.1.2
7
+ date: 2006-07-28 00:00:00 +01:00
8
+ summary: A library to return the syntax/structure of a word in term of consonant and vowel groups.
9
+ require_paths:
10
+ - lib
11
+ email: self@mattmower.com
12
+ homepage: http://rubyforge.org/projects/rubymatt/
13
+ rubyforge_project: rubymatt
14
+ description:
15
+ autorequire: strsyntax
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Matt Mower
31
+ files:
32
+ - lib/strsyntax.rb
33
+ - lib/strsyntax/stdlib_ext.rb
34
+ - lib/strsyntax/strsyntax.rb
35
+ - test/test_parser.rb
36
+ - bin/strsyntax
37
+ test_files: []
38
+
39
+ rdoc_options: []
40
+
41
+ extra_rdoc_files: []
42
+
43
+ executables:
44
+ - strsyntax
45
+ extensions: []
46
+
47
+ requirements: []
48
+
49
+ dependencies: []
50
+