strsyntax 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'strsyntax'
5
+
6
+ options = {
7
+ :command => :structure
8
+ }
9
+
10
+ opts = OptionParser.new do |opts|
11
+ opts.banner = "Usage: string_structure [options] word"
12
+ opts.separator ""
13
+
14
+ opts.on( "-y", "--syntax", "Splits string into syntax elements: [:cv,:vc,:vc]" ) do
15
+ options[:command] = :syntax
16
+ end
17
+
18
+ opts.on( "-p", "--parts", "Splits string into syntax instances: ['ro','ut','er']" ) do
19
+ options[:command] = :parts
20
+ end
21
+
22
+ opts.on( "-s", "--structure", "Splits string into syntax structure: [[:cv,'ro'],[:vc,'ut'],[:vc,'er']]" ) do
23
+ options[:command] = :structure
24
+ end
25
+
26
+ opts.separator ""
27
+
28
+ opts.parse!( ARGV )
29
+ end
30
+
31
+ str = opts.default_argv.first
32
+
33
+ case options[:command]
34
+ when :syntax
35
+ puts str.syntax.inspect
36
+ when :parts
37
+ puts str.parts.inspect
38
+ when :structure
39
+ puts str.structure.inspect
40
+ else
41
+ puts opts
42
+ end
@@ -0,0 +1,2 @@
1
+ require 'strsyntax/strsyntax'
2
+ require 'strsyntax/stdlib_ext'
@@ -0,0 +1,53 @@
1
+ #
2
+ # Extend standard library classes with methods required by Ngrams
3
+ #
4
+ if !String.respond_to? :syntax
5
+ class String
6
+ # Returns an array containing the syntax of the string in terms of consonant and vowel
7
+ # groups. The groups are CVC, VCV, CV, VC, C, and V. For example:
8
+ #
9
+ # "groucho".syntax => [:c,:cv,:vc,:cv]
10
+ # "harpo".syntax => [:cvc,:cv]
11
+ # "chico".syntax => [:c,:cvc,:v]
12
+ # "zeppo".syntax => [:cvc,:cv]
13
+ # "teasdale".syntax => [:cv,:vc,:cvc,:v]
14
+ def syntax
15
+ StringSyntax::Parser.parse( self )
16
+ end
17
+ end
18
+ else
19
+ raise "Cannot patch in String#syntax as it is already defined!"
20
+ end
21
+
22
+ if !String.respond_to? :parts
23
+ class String
24
+ # Return an array containing the constituent parts of the string as represented by
25
+ # its syntax. For example:
26
+ #
27
+ # "groucho".parts => ["g","ro","uc","ho"]
28
+ # "harpo".parts => ["har","po"]
29
+ # "chico".parts => ["c","hic", "o"]
30
+ # "zeppo".parts => ["zep","po"]
31
+ # "teasdale".parts => ["te","as","dal","e"]
32
+ def parts
33
+ StringSyntax::Parser.split( self )
34
+ end
35
+ end
36
+ else
37
+ raise "Cannot patch in String#parts as it is already defined!"
38
+ end
39
+
40
+ if !String.respond_to? :structure
41
+ class String
42
+ # Returns an array containing an amalgam of the syntax and parts of the string
43
+ # See #syntax and #parts for more information. Example
44
+ #
45
+ # "groucho".stmap => [[:c,"g"],[:cv,"ro"],[:vc,"uc"],[:cv,"ho"]]
46
+ #
47
+ def structure
48
+ self.syntax.zip( self.parts )
49
+ end
50
+ end
51
+ else
52
+ raise "Cannot patch in String#structure as it is already defined!"
53
+ end
@@ -0,0 +1,119 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Can we decode any word into some combination of
4
+ # CV, VC, CVC, VCV
5
+ # ?
6
+ #
7
+
8
+ module StringSyntax
9
+ class Parser
10
+ C = /^([bcdfghjklmnpqrstvwxyz])/
11
+ V = /^([aeiou])/
12
+ CV = /^([bcdfghjklmnpqrstvwxyz][aeiou])/
13
+ CVC = /^([bcdfghjklmnpqrstvwxyz][aeiou][bcdfghjklmnpqrstvwxyz])/
14
+ VC = /^([aeiou][bcdfghjklmnpqrstvwxyz])/
15
+ VCV = /^([aeiou][bcdfghjklmnpqrstvwxyz][aeiou])/
16
+
17
+ SCHEMA_TEMPLATES = {
18
+ C => :c,
19
+ V => :v,
20
+ CV => :cv,
21
+ CVC => :cvc,
22
+ VC => :vc,
23
+ VCV => :vcv
24
+ }
25
+
26
+ # Returns an array containing the components of the string in terms of consonant
27
+ # and vowel groupings: CVC, VCV, CV, VC, C, V.
28
+ def self.parse( s )
29
+ s = s.downcase
30
+
31
+ structure = case s
32
+ when CVC
33
+ [ parse_subpart( CVC, s[0,3], s[(3..-1)] ), parse_subpart( CV, s[0,2], s[(2..-1)] ) ]
34
+ when CV
35
+ [ parse_subpart( CV, s[0,2], s[(2..-1)] ), parse_subpart( C, s[0,1], s[(1..-1)] ) ]
36
+ when C
37
+ [ parse_subpart( C, s[0,1], s[(1..-1)] ) ]
38
+ when VCV
39
+ [ parse_subpart( VCV, s[0,3], s[(3..-1)] ), parse_subpart( VC, s[0,2], s[(2..-1)] ) ]
40
+ when VC
41
+ [ parse_subpart( VC, s[0,2], s[(2..-1)] ), parse_subpart( V, s[0,1], s[(1..-1)] ) ]
42
+ when V
43
+ [ parse_subpart( V, s[0,1], s[(1..-1)] ) ]
44
+ else
45
+ []
46
+ end
47
+
48
+ # Return the sub-structure containing the least number of stray :c and :v
49
+ structure.sort_by { |s| score( s ) }.first
50
+ end
51
+
52
+ # Return the number of :c and :v components in the structure
53
+ def self.score( s )
54
+ s.inject( 0 ) { |sum, e| e == :c || e == :v ? sum + 1 : sum }
55
+ end
56
+
57
+ def self.split( s, structure = parse( s ) )
58
+ templates = structure.map { |schema| template_from_schema( schema ) }
59
+ templates.map do |template|
60
+ match_data = template.match( s )
61
+ s = match_data.post_match
62
+ match_data[1]
63
+ end
64
+ end
65
+
66
+ def self.schema_from_template( template )
67
+ SCHEMA_TEMPLATES[template]
68
+ end
69
+
70
+ def self.template_from_schema( schema )
71
+ case schema
72
+ when :cvc
73
+ CVC
74
+ when :vcv
75
+ VCV
76
+ when :cv
77
+ CV
78
+ when :vc
79
+ VC
80
+ when :c
81
+ C
82
+ when :v
83
+ V
84
+ else
85
+ raise "Unknown schema: #{schema}"
86
+ end
87
+ end
88
+
89
+ def self.schema_from_string( s )
90
+ case s
91
+ when CVC
92
+ :cvc
93
+ when CV
94
+ :cv
95
+ when VCV
96
+ :vcv
97
+ when VC
98
+ :vc
99
+ when C
100
+ :c
101
+ when V
102
+ :v
103
+ end
104
+ end
105
+
106
+ private
107
+ def self.parse_subpart( template, match, rest )
108
+ [ schema_from_template( template ), *parse( rest ) ].compact
109
+ end
110
+ end
111
+ end
112
+
113
+ if __FILE__ == $0
114
+ WORDS = %w( hello google amazon ookles paoga linguistics antidisestablishmentarianism )
115
+
116
+ WORDS.each do |word|
117
+ puts "#{word} -> #{ StringSyntax::Parser.parse( word ).inspect}"
118
+ end
119
+ end
@@ -0,0 +1,30 @@
1
+ $:<< File.join( File.dirname( __FILE__ ), '..', 'lib' )
2
+
3
+ require 'strsyntax'
4
+ require 'test/unit'
5
+
6
+ class TestParser < Test::Unit::TestCase
7
+
8
+ def test_syntax
9
+ assert_equal [:cvc,:cv], "hello".syntax
10
+ assert_equal [:vc,:c,:cvc,:vcv], "amplitude".syntax
11
+ assert_equal [:c,:cvc,:c,:cvc,:cvc], "transmitter".syntax
12
+ assert_equal [:cv,:vc,:cv], "google".syntax
13
+ assert_equal [:vcv,:cv,:cv,:vc], "aluminium".syntax
14
+ assert_equal [:cv,:vc,:cv,:cv], "teasdale".syntax
15
+ end
16
+
17
+ def test_parts
18
+ assert_equal ["hel","lo"], "hello".parts
19
+ assert_equal ["am","p","lit","ude"], "amplitude".parts
20
+ assert_equal ["t","ran","s","mit","ter"], "transmitter".parts
21
+ assert_equal ["go","og","le"], "google".parts
22
+ assert_equal ["alu","mi", "ni","um"], "aluminium".parts
23
+ assert_equal ["te","as","da","le"], "teasdale".parts
24
+ end
25
+
26
+ def test_structure
27
+ assert_equal [[:c,"g"],[:cv,"ro"],[:vc,"uc"],[:cv,"ho"]], "groucho".structure
28
+ end
29
+
30
+ end
metadata ADDED
@@ -0,0 +1,50 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.0
3
+ specification_version: !int:Fixnum 1
4
+ name: strsyntax
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.1.2
7
+ date: 2006-07-28 00:00:00 +01:00
8
+ summary: A library to return the syntax/structure of a word in term of consonant and vowel groups.
9
+ require_paths:
10
+ - lib
11
+ email: self@mattmower.com
12
+ homepage: http://rubyforge.org/projects/rubymatt/
13
+ rubyforge_project: rubymatt
14
+ description:
15
+ autorequire: strsyntax
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Matt Mower
31
+ files:
32
+ - lib/strsyntax.rb
33
+ - lib/strsyntax/stdlib_ext.rb
34
+ - lib/strsyntax/strsyntax.rb
35
+ - test/test_parser.rb
36
+ - bin/strsyntax
37
+ test_files: []
38
+
39
+ rdoc_options: []
40
+
41
+ extra_rdoc_files: []
42
+
43
+ executables:
44
+ - strsyntax
45
+ extensions: []
46
+
47
+ requirements: []
48
+
49
+ dependencies: []
50
+