ai4r 1.5 → 1.6
Sign up to get free protection for your applications and to get access to all the features.
- data/examples/clusterers/simple_website_clustering.rb +47 -0
- data/lib/ai4r.rb +7 -0
- data/lib/ai4r/classifiers/hyperpipes.rb +118 -0
- data/lib/ai4r/clusterers/average_linkage.rb +22 -23
- data/lib/ai4r/clusterers/centroid_linkage.rb +66 -0
- data/lib/ai4r/clusterers/complete_linkage.rb +17 -12
- data/lib/ai4r/clusterers/diana.rb +139 -0
- data/lib/ai4r/clusterers/median_linkage.rb +61 -0
- data/lib/ai4r/clusterers/single_linkage.rb +57 -42
- data/lib/ai4r/clusterers/ward_linkage.rb +64 -0
- data/lib/ai4r/clusterers/weighted_average_linkage.rb +61 -0
- data/lib/ai4r/data/constants.rb +18 -0
- data/lib/ai4r/data/data_set.rb +5 -3
- data/lib/ai4r/data/proximity.rb +18 -0
- data/test/clusterers/average_linkage_test.rb +14 -11
- data/test/clusterers/bisecting_k_means_test.rb +9 -0
- data/test/clusterers/centroid_linkage_test.rb +50 -0
- data/test/clusterers/complete_linkage_test.rb +14 -5
- data/test/clusterers/diana_test.rb +69 -0
- data/test/clusterers/k_means_test.rb +9 -0
- data/test/clusterers/median_linkage_test.rb +50 -0
- data/test/clusterers/single_linkage_test.rb +15 -6
- data/test/clusterers/ward_linkage_test.rb +50 -0
- data/test/clusterers/weighted_average_linkage_test.rb +50 -0
- data/test/data/data_set_test.rb +14 -0
- data/test/data/proximity_test.rb +10 -0
- metadata +87 -298
- data/site/build/site/en/broken-links.xml +0 -2
- data/site/build/site/en/build/tmp/build-info.xml +0 -5
- data/site/build/site/en/build/tmp/plugins-1.xml +0 -212
- data/site/build/site/en/build/tmp/plugins-2.xml +0 -252
- data/site/build/site/en/build/tmp/projfilters.properties +0 -41
- data/site/build/site/en/downloads.html +0 -200
- data/site/build/site/en/downloads.pdf +0 -151
- data/site/build/site/en/geneticAlgorithms.html +0 -591
- data/site/build/site/en/geneticAlgorithms.pdf +0 -934
- data/site/build/site/en/images/ai4r-logo.png +0 -0
- data/site/build/site/en/images/built-with-forrest-button.png +0 -0
- data/site/build/site/en/images/c.png +0 -0
- data/site/build/site/en/images/c_wbn.png +0 -0
- data/site/build/site/en/images/c_wn.png +0 -0
- data/site/build/site/en/images/ero.gif +0 -0
- data/site/build/site/en/images/europe2.png +0 -0
- data/site/build/site/en/images/europe3.png +0 -0
- data/site/build/site/en/images/fitness.png +0 -0
- data/site/build/site/en/images/genetic_algorithms_example.png +0 -0
- data/site/build/site/en/images/instruction_arrow.png +0 -0
- data/site/build/site/en/images/jadeferret.png +0 -0
- data/site/build/site/en/images/my_email.png +0 -0
- data/site/build/site/en/images/neural_network_example.png +0 -0
- data/site/build/site/en/images/rubyforge.png +0 -0
- data/site/build/site/en/images/s.png +0 -0
- data/site/build/site/en/images/s_wbn.png +0 -0
- data/site/build/site/en/images/s_wn.png +0 -0
- data/site/build/site/en/images/sigmoid.png +0 -0
- data/site/build/site/en/images/t.png +0 -0
- data/site/build/site/en/images/t_wbn.png +0 -0
- data/site/build/site/en/images/t_wn.png +0 -0
- data/site/build/site/en/index.html +0 -390
- data/site/build/site/en/index.pdf +0 -657
- data/site/build/site/en/linkmap.html +0 -261
- data/site/build/site/en/linkmap.pdf +0 -94
- data/site/build/site/en/locationmap.xml +0 -72
- data/site/build/site/en/machineLearning.html +0 -340
- data/site/build/site/en/machineLearning.pdf +0 -337
- data/site/build/site/en/neuralNetworks.html +0 -521
- data/site/build/site/en/neuralNetworks.pdf +0 -671
- data/site/build/site/en/skin/CommonMessages_de.xml +0 -23
- data/site/build/site/en/skin/CommonMessages_en_US.xml +0 -23
- data/site/build/site/en/skin/CommonMessages_es.xml +0 -23
- data/site/build/site/en/skin/CommonMessages_fr.xml +0 -23
- data/site/build/site/en/skin/basic.css +0 -166
- data/site/build/site/en/skin/breadcrumbs-optimized.js +0 -90
- data/site/build/site/en/skin/breadcrumbs.js +0 -237
- data/site/build/site/en/skin/fontsize.js +0 -166
- data/site/build/site/en/skin/getBlank.js +0 -40
- data/site/build/site/en/skin/getMenu.js +0 -45
- data/site/build/site/en/skin/images/README.txt +0 -1
- data/site/build/site/en/skin/images/add.jpg +0 -0
- data/site/build/site/en/skin/images/built-with-forrest-button.png +0 -0
- data/site/build/site/en/skin/images/chapter.gif +0 -0
- data/site/build/site/en/skin/images/chapter_open.gif +0 -0
- data/site/build/site/en/skin/images/current.gif +0 -0
- data/site/build/site/en/skin/images/error.png +0 -0
- data/site/build/site/en/skin/images/external-link.gif +0 -0
- data/site/build/site/en/skin/images/fix.jpg +0 -0
- data/site/build/site/en/skin/images/forrest-credit-logo.png +0 -0
- data/site/build/site/en/skin/images/hack.jpg +0 -0
- data/site/build/site/en/skin/images/header_white_line.gif +0 -0
- data/site/build/site/en/skin/images/info.png +0 -0
- data/site/build/site/en/skin/images/instruction_arrow.png +0 -0
- data/site/build/site/en/skin/images/label.gif +0 -0
- data/site/build/site/en/skin/images/page.gif +0 -0
- data/site/build/site/en/skin/images/pdfdoc.gif +0 -0
- data/site/build/site/en/skin/images/poddoc.png +0 -0
- data/site/build/site/en/skin/images/printer.gif +0 -0
- data/site/build/site/en/skin/images/rc-b-l-15-1body-2menu-3menu.png +0 -0
- data/site/build/site/en/skin/images/rc-b-r-15-1body-2menu-3menu.png +0 -0
- data/site/build/site/en/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png +0 -0
- data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-15-1body-2menu-3menu.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png +0 -0
- data/site/build/site/en/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png +0 -0
- data/site/build/site/en/skin/images/remove.jpg +0 -0
- data/site/build/site/en/skin/images/rss.png +0 -0
- data/site/build/site/en/skin/images/spacer.gif +0 -0
- data/site/build/site/en/skin/images/success.png +0 -0
- data/site/build/site/en/skin/images/txtdoc.png +0 -0
- data/site/build/site/en/skin/images/update.jpg +0 -0
- data/site/build/site/en/skin/images/valid-html401.png +0 -0
- data/site/build/site/en/skin/images/vcss.png +0 -0
- data/site/build/site/en/skin/images/warning.png +0 -0
- data/site/build/site/en/skin/images/xmldoc.gif +0 -0
- data/site/build/site/en/skin/menu.js +0 -48
- data/site/build/site/en/skin/note.txt +0 -50
- data/site/build/site/en/skin/print.css +0 -54
- data/site/build/site/en/skin/profile.css +0 -163
- data/site/build/site/en/skin/prototype.js +0 -1257
- data/site/build/site/en/skin/screen.css +0 -587
- data/site/build/site/en/sourceCode.html +0 -244
- data/site/build/site/en/sourceCode.pdf +0 -278
- data/site/build/site/en/svn.html +0 -244
- data/site/build/site/en/svn.pdf +0 -278
- data/site/build/tmp/brokenlinks.xml +0 -2
- data/site/build/tmp/build-info.xml +0 -5
- data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
- data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
- data/site/build/tmp/input.xmap +0 -32
- data/site/build/tmp/internal.xmap +0 -32
- data/site/build/tmp/locationmap.xml +0 -29
- data/site/build/tmp/output.xmap +0 -38
- data/site/build/tmp/pluginlist2fetchbuild.xml +0 -144
- data/site/build/tmp/plugins-1.xml +0 -201
- data/site/build/tmp/plugins-2.xml +0 -401
- data/site/build/tmp/projfilters.properties +0 -41
- data/site/build/tmp/resources.xmap +0 -32
- data/site/build/webapp/WEB-INF/logs/access.log +0 -0
- data/site/build/webapp/WEB-INF/logs/core.log +0 -775
- data/site/build/webapp/WEB-INF/logs/debug.log +0 -0
- data/site/build/webapp/WEB-INF/logs/error.log +0 -213
- data/site/build/webapp/WEB-INF/logs/flow.log +0 -0
- data/site/build/webapp/WEB-INF/logs/idgen.log +0 -0
- data/site/build/webapp/WEB-INF/logs/linkrewriter.log +0 -0
- data/site/build/webapp/WEB-INF/logs/locationmap.log +0 -0
- data/site/build/webapp/WEB-INF/logs/sitemap.log +0 -0
- data/site/build/webapp/WEB-INF/logs/xmlform.log +0 -0
- data/site/forrest.properties +0 -152
- data/site/forrest.properties.dispatcher.properties +0 -25
- data/site/forrest.properties.xml +0 -29
- data/site/src/documentation/README.txt +0 -7
- data/site/src/documentation/classes/CatalogManager.properties +0 -62
- data/site/src/documentation/content/locationmap.xml +0 -72
- data/site/src/documentation/content/xdocs/downloads.html +0 -9
- data/site/src/documentation/content/xdocs/geneticAlgorithms.xml +0 -294
- data/site/src/documentation/content/xdocs/index.xml +0 -129
- data/site/src/documentation/content/xdocs/machineLearning.xml +0 -131
- data/site/src/documentation/content/xdocs/neuralNetworks.xml +0 -270
- data/site/src/documentation/content/xdocs/site.xml +0 -54
- data/site/src/documentation/content/xdocs/sourceCode.xml +0 -43
- data/site/src/documentation/content/xdocs/tabs.xml +0 -35
- data/site/src/documentation/resources/images/ai4r-logo.png +0 -0
- data/site/src/documentation/resources/images/c.png +0 -0
- data/site/src/documentation/resources/images/c_wbn.png +0 -0
- data/site/src/documentation/resources/images/c_wn.png +0 -0
- data/site/src/documentation/resources/images/ellipse-2.svg +0 -30
- data/site/src/documentation/resources/images/ero.gif +0 -0
- data/site/src/documentation/resources/images/europe2.png +0 -0
- data/site/src/documentation/resources/images/europe3.png +0 -0
- data/site/src/documentation/resources/images/fitness.png +0 -0
- data/site/src/documentation/resources/images/genetic_algorithms_example.png +0 -0
- data/site/src/documentation/resources/images/icon-a.png +0 -0
- data/site/src/documentation/resources/images/icon-b.png +0 -0
- data/site/src/documentation/resources/images/icon.png +0 -0
- data/site/src/documentation/resources/images/jadeferret.png +0 -0
- data/site/src/documentation/resources/images/my_email.png +0 -0
- data/site/src/documentation/resources/images/neural_network_example.png +0 -0
- data/site/src/documentation/resources/images/project-logo.png +0 -0
- data/site/src/documentation/resources/images/rubyforge.png +0 -0
- data/site/src/documentation/resources/images/s.png +0 -0
- data/site/src/documentation/resources/images/s_wbn.png +0 -0
- data/site/src/documentation/resources/images/s_wn.png +0 -0
- data/site/src/documentation/resources/images/sigmoid.png +0 -0
- data/site/src/documentation/resources/images/sub-dir/icon-c.png +0 -0
- data/site/src/documentation/resources/images/t.png +0 -0
- data/site/src/documentation/resources/images/t_wbn.png +0 -0
- data/site/src/documentation/resources/images/t_wn.png +0 -0
- data/site/src/documentation/resources/schema/catalog.xcat +0 -29
- data/site/src/documentation/resources/schema/hello-v10.dtd +0 -51
- data/site/src/documentation/resources/schema/symbols-project-v10.ent +0 -26
- data/site/src/documentation/resources/stylesheets/hello2document.xsl +0 -33
- data/site/src/documentation/sitemap.xmap +0 -66
- data/site/src/documentation/skinconf.xml +0 -418
- data/site/src/documentation/translations/langcode.xml +0 -29
- data/site/src/documentation/translations/languages_de.xml +0 -24
- data/site/src/documentation/translations/languages_en.xml +0 -24
- data/site/src/documentation/translations/languages_es.xml +0 -22
- data/site/src/documentation/translations/languages_fr.xml +0 -24
- data/site/src/documentation/translations/languages_nl.xml +0 -24
- data/site/src/documentation/translations/menu.xml +0 -33
- data/site/src/documentation/translations/menu_af.xml +0 -33
- data/site/src/documentation/translations/menu_de.xml +0 -33
- data/site/src/documentation/translations/menu_es.xml +0 -33
- data/site/src/documentation/translations/menu_fr.xml +0 -33
- data/site/src/documentation/translations/menu_it.xml +0 -33
- data/site/src/documentation/translations/menu_nl.xml +0 -33
- data/site/src/documentation/translations/menu_no.xml +0 -33
- data/site/src/documentation/translations/menu_ru.xml +0 -33
- data/site/src/documentation/translations/menu_sk.xml +0 -33
- data/site/src/documentation/translations/tabs.xml +0 -22
- data/site/src/documentation/translations/tabs_de.xml +0 -22
- data/site/src/documentation/translations/tabs_es.xml +0 -22
- data/site/src/documentation/translations/tabs_fr.xml +0 -22
- data/site/src/documentation/translations/tabs_nl.xml +0 -22
@@ -1,131 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
|
3
|
-
<document>
|
4
|
-
<header>
|
5
|
-
<title>Machine Learning with ID3 Decision Trees in Ruby</title>
|
6
|
-
</header>
|
7
|
-
<body>
|
8
|
-
<section id="mach-intro">
|
9
|
-
<title>Introduction to ID3 algorithm</title>
|
10
|
-
<p>
|
11
|
-
AI4R implements the ID3 algorithm (Quinlan) as one of its automatic classifiers.
|
12
|
-
Given a set of preclassified examples, it builds a top-down
|
13
|
-
induction of decision tree, biased by the information gain and
|
14
|
-
entropy measure.
|
15
|
-
</p>
|
16
|
-
<p>
|
17
|
-
The good thing about this automatic learning method is that humans learns as well.
|
18
|
-
Unlike other AI techniques like neural networks, classifiers can
|
19
|
-
generate ruby code with if / else sentences. You
|
20
|
-
can use this to evaluate parameters on realtime, copy paste them in a
|
21
|
-
code, or just read them to learn about your problem domain.
|
22
|
-
</p>
|
23
|
-
</section>
|
24
|
-
|
25
|
-
<section id="mach-HowTo">
|
26
|
-
<title>Marketing target strategy example using ID3 Decision Trees in Ruby</title>
|
27
|
-
<p>Let's suppose that you are writting an application that must identify people as relevant marketing targets or not.
|
28
|
-
The only information that you have is a collection of examples, provided by a marketing survey:</p>
|
29
|
-
<source>
|
30
|
-
<![CDATA[
|
31
|
-
DATA_LABELS = [ 'city', 'age_range', 'gender', 'marketing_target' ]
|
32
|
-
|
33
|
-
DATA_SET = [
|
34
|
-
['New York', '<30', 'M', 'Y'],
|
35
|
-
['Chicago', '<30', 'M', 'Y'],
|
36
|
-
['Chicago', '<30', 'F', 'Y'],
|
37
|
-
['New York', '<30', 'M', 'Y'],
|
38
|
-
['New York', '<30', 'M', 'Y'],
|
39
|
-
['Chicago', '[30-50)', 'M', 'Y'],
|
40
|
-
['New York', '[30-50)', 'F', 'N'],
|
41
|
-
['Chicago', '[30-50)', 'F', 'Y'],
|
42
|
-
['New York', '[30-50)', 'F', 'N'],
|
43
|
-
['Chicago', '[50-80]', 'M', 'N'],
|
44
|
-
['New York', '[50-80]', 'F', 'N'],
|
45
|
-
['New York', '[50-80]', 'M', 'N'],
|
46
|
-
['Chicago', '[50-80]', 'M', 'N'],
|
47
|
-
['New York', '[50-80]', 'F', 'N'],
|
48
|
-
['Chicago', '>80', 'F', 'Y']
|
49
|
-
]
|
50
|
-
]]>
|
51
|
-
</source>
|
52
|
-
<p>You can create an ID3 Decision tree to do the dirty job for you:</p>
|
53
|
-
<source>
|
54
|
-
<![CDATA[
|
55
|
-
id3 = ID3.new(DATA_SET, DATA_LABELS)
|
56
|
-
]]>
|
57
|
-
</source>
|
58
|
-
<p>The Decision tree will automatically create the "rules" to parse new data,
|
59
|
-
and identify new posible marketing targets:</p>
|
60
|
-
<source>
|
61
|
-
<![CDATA[
|
62
|
-
id3.get_rules
|
63
|
-
# => if age_range=='<30' then marketing_target='Y'
|
64
|
-
elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
|
65
|
-
elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
|
66
|
-
elsif age_range=='[50-80]' then marketing_target='N'
|
67
|
-
elsif age_range=='>80' then marketing_target='Y'
|
68
|
-
else raise 'There was not enough information during training to do a proper induction for this data element' end
|
69
|
-
|
70
|
-
id3.eval(['New York', '<30', 'M'])
|
71
|
-
# => 'Y'
|
72
|
-
]]>
|
73
|
-
</source>
|
74
|
-
</section>
|
75
|
-
|
76
|
-
<section id="mach-dataload">
|
77
|
-
<title>Better data loading</title>
|
78
|
-
<p>
|
79
|
-
In real life you will use many more data training examples,
|
80
|
-
with more attributes.
|
81
|
-
Consider moving your data to an external CSV (comma separate values) file.
|
82
|
-
</p>
|
83
|
-
<source>
|
84
|
-
<![CDATA[
|
85
|
-
data_set = []
|
86
|
-
CSV::Reader.parse(File.open("#{File.dirname(__FILE__)}/data_set.csv", 'r')) do |row|
|
87
|
-
data_set << row
|
88
|
-
end
|
89
|
-
data_labels = data_set.shift
|
90
|
-
|
91
|
-
id3 = ID3.new(data_set, data_labels)
|
92
|
-
]]></source>
|
93
|
-
|
94
|
-
</section>
|
95
|
-
|
96
|
-
<section id="mach-eval">
|
97
|
-
<title>A good tip for data evaluation</title>
|
98
|
-
<p>
|
99
|
-
The ID3 class provides a method to evaluate new data.
|
100
|
-
</p>
|
101
|
-
<source>
|
102
|
-
<![CDATA[
|
103
|
-
id3.eval(['New York', '<30', 'M'])
|
104
|
-
# => 'Y'
|
105
|
-
]]></source>
|
106
|
-
<p>
|
107
|
-
But instead of going through the tree every time, you can take advantage of the
|
108
|
-
fact that the method "get_rules" generates proper ruby code!
|
109
|
-
</p>
|
110
|
-
<source>
|
111
|
-
<![CDATA[
|
112
|
-
id3 = ID3.new(DATA_SET, DATA_LABELS)
|
113
|
-
age_range = '<30'
|
114
|
-
city = 'New York'
|
115
|
-
gender = 'M'
|
116
|
-
marketing_target = nil
|
117
|
-
eval id3.get_rules
|
118
|
-
puts marketing_target
|
119
|
-
# => 'Y'
|
120
|
-
]]></source>
|
121
|
-
</section>
|
122
|
-
|
123
|
-
<section id="mach-more">
|
124
|
-
<title>More about ID3 and decision trees</title>
|
125
|
-
<p>
|
126
|
-
<a href="http://en.wikipedia.org/wiki/Decision_tree">Wikipedia article on Decision trees</a>
|
127
|
-
<a href="http://en.wikipedia.org/wiki/ID3_algorithm">Wikipedia article on ID3 Algorithm</a>
|
128
|
-
</p>
|
129
|
-
</section>
|
130
|
-
</body>
|
131
|
-
</document>
|
@@ -1,270 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<!--
|
3
|
-
Licensed to the Apache Software Foundation (ASF) under one or more
|
4
|
-
contributor license agreements. See the NOTICE file distributed with
|
5
|
-
this work for additional information regarding copyright ownership.
|
6
|
-
The ASF licenses this file to You under the Apache License, Version 2.0
|
7
|
-
(the "License"); you may not use this file except in compliance with
|
8
|
-
the License. You may obtain a copy of the License at
|
9
|
-
|
10
|
-
http://www.apache.org/licenses/LICENSE-2.0
|
11
|
-
|
12
|
-
Unless required by applicable law or agreed to in writing, software
|
13
|
-
distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
-
See the License for the specific language governing permissions and
|
16
|
-
limitations under the License.
|
17
|
-
|
18
|
-
-->
|
19
|
-
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
|
20
|
-
<document>
|
21
|
-
<header>
|
22
|
-
<title>OCR example using Backpropagation networks in ruby :: ai4r</title>
|
23
|
-
</header>
|
24
|
-
<body>
|
25
|
-
<section id="nn-Introduction">
|
26
|
-
<title>Introduction to Neural Networks</title>
|
27
|
-
<p>
|
28
|
-
The utility of artificial neural network models lies in the fact
|
29
|
-
that they can be used to infer a function from observations. This is
|
30
|
-
particularly useful in applications where the complexity of the data
|
31
|
-
or task makes the design of such a function by hand impractical.
|
32
|
-
Neural Networks are being used in many businesses and applications.
|
33
|
-
Their ability to learn by example makes them attractive in environments
|
34
|
-
where the business rules are either not well defined or are hard to
|
35
|
-
enumerate and define. Many people believe that Neural Networks can
|
36
|
-
only solve toy problems. Give them a try, and let you decide if they
|
37
|
-
are good enough to solve your needs.
|
38
|
-
</p>
|
39
|
-
<p>
|
40
|
-
In this module you will find an implementation of neural networks
|
41
|
-
using the Backpropagation is a supervised learning technique
|
42
|
-
(described by Paul Werbos in 1974, and further developed by David E.
|
43
|
-
Rumelhart, Geoffrey E. Hinton and Ronald J. Williams in 1986)
|
44
|
-
</p>
|
45
|
-
</section>
|
46
|
-
<section id="nn-example">
|
47
|
-
<title>Modeling the OCR problem using Neural Networks networks</title>
|
48
|
-
<p>
|
49
|
-
Let's imagine that we have to implement a program to identify simple patterns
|
50
|
-
(triangles, squares, crosses, etc). The main problem is that this program must
|
51
|
-
be resistant to random noise in the image (pixels with wrong values) and
|
52
|
-
line noise (similar to the unwanted direct current that we usually have in a
|
53
|
-
signal).
|
54
|
-
</p>
|
55
|
-
<p>
|
56
|
-
In order to solve this problem, we can take an example of each pattern
|
57
|
-
to be recognized, and train a neural network to identify similar patterns.
|
58
|
-
In fact, one of the most popular uses of neural networks in business
|
59
|
-
applications is OCR (opticar character recognition)
|
60
|
-
</p>
|
61
|
-
<p>
|
62
|
-
In our porposed solotion, we create a network with the following
|
63
|
-
architecture: 256 input neurons and 3 output neurons. We feed this network
|
64
|
-
with 16x16 matrices (in fact will convert them to vectors of length 256).
|
65
|
-
Each pixel is represented with a number from 0 (white pixel) to 10
|
66
|
-
(black pixel). The output of this network if a 3 vector of dimension 3,
|
67
|
-
where ideally:
|
68
|
-
</p>
|
69
|
-
<ul>
|
70
|
-
<li>(1, 0, 0) for triangles</li>
|
71
|
-
<li>(0, 1, 0) for squares</li>
|
72
|
-
<li>(0, 0, 1) for crosses</li>
|
73
|
-
</ul>
|
74
|
-
<p>We train our backpropagation neural network using the following examples:</p>
|
75
|
-
<table>
|
76
|
-
<caption>Training patterns</caption>
|
77
|
-
<tr>
|
78
|
-
<td><img alt="Triangule training example" src="/images/t.png" /></td>
|
79
|
-
<td><img alt="Square training example" src="/images/s.png" /></td>
|
80
|
-
<td><img alt="Cross training example" src="/images/c.png" /></td>
|
81
|
-
</tr>
|
82
|
-
</table>
|
83
|
-
</section>
|
84
|
-
<section id="nn2-example">
|
85
|
-
<title>Implementing a basic OCR application using ruby and AI4R</title>
|
86
|
-
<p>The code bellow shows the basic steps to use Backpropagation Neural
|
87
|
-
Networks in AI4R:</p>
|
88
|
-
<source>
|
89
|
-
<![CDATA[
|
90
|
-
# Create the network with:
|
91
|
-
# 4 inputs
|
92
|
-
# 1 hidden layer with 3 neurons
|
93
|
-
# 2 outputs
|
94
|
-
net = Ai4r::NeuralNetwork::Backpropagation.new([4, 3, 2])
|
95
|
-
|
96
|
-
# Train the network
|
97
|
-
100.times do |i|
|
98
|
-
net.train(example[i], result[i])
|
99
|
-
end
|
100
|
-
|
101
|
-
# Use it: Evaluate data with the trained network
|
102
|
-
net.eval([12, 48, 12, 25]) # => [0.89, 0.04]
|
103
|
-
]]>
|
104
|
-
</source>
|
105
|
-
<p>This is the source code used to elaborate this simple OCR application
|
106
|
-
(You can find it inside the AI4R zip file release):</p>
|
107
|
-
<source>
|
108
|
-
<![CDATA[
|
109
|
-
require "rubygems"
|
110
|
-
require "ai4r"
|
111
|
-
require File.dirname(__FILE__) + '/training_patterns'
|
112
|
-
require File.dirname(__FILE__) + '/patterns_with_noise'
|
113
|
-
require File.dirname(__FILE__) + '/patterns_with_base_noise'
|
114
|
-
|
115
|
-
# Create a network with 256 inputs, and 3 outputs
|
116
|
-
net = Ai4r::NeuralNetwork::Backpropagation.new([256, 3])
|
117
|
-
|
118
|
-
# Load training data
|
119
|
-
tr_input = TRIANGLE.flatten.collect { |input| input.to_f / 10}
|
120
|
-
sq_input = SQUARE.flatten.collect { |input| input.to_f / 10}
|
121
|
-
cr_input = CROSS.flatten.collect { |input| input.to_f / 10}
|
122
|
-
# Train the network
|
123
|
-
puts "Training the network, please wait."
|
124
|
-
100.times do
|
125
|
-
net.train(tr_input, [1,0,0])
|
126
|
-
net.train(sq_input, [0,1,0])
|
127
|
-
net.train(cr_input, [0,0,1])
|
128
|
-
end
|
129
|
-
|
130
|
-
# Load test data with noise
|
131
|
-
tr_with_noise = TRIANGLE_WITH_NOISE.flatten.collect { |input| input.to_f / 10}
|
132
|
-
sq_with_noise = SQUARE_WITH_NOISE.flatten.collect { |input| input.to_f / 10}
|
133
|
-
cr_with_noise = CROSS_WITH_NOISE.flatten.collect { |input| input.to_f / 10}
|
134
|
-
|
135
|
-
tr_with_base_noise = TRIANGLE_WITH_BASE_NOISE.flatten.collect { |input| input.to_f / 10}
|
136
|
-
sq_with_base_noise = SQUARE_WITH_BASE_NOISE.flatten.collect { |input| input.to_f / 10}
|
137
|
-
cr_with_base_noise = CROSS_WITH_BASE_NOISE.flatten.collect { |input| input.to_f / 10}
|
138
|
-
|
139
|
-
# Print the evaluation results
|
140
|
-
|
141
|
-
def result_label(result)
|
142
|
-
if result[0] > result[1] && result[0] > result[2]
|
143
|
-
"TRIANGLE"
|
144
|
-
elsif result[1] > result[2]
|
145
|
-
"SQUARE"
|
146
|
-
else
|
147
|
-
"CROSS"
|
148
|
-
end
|
149
|
-
end
|
150
|
-
|
151
|
-
puts "Training Examples"
|
152
|
-
puts "#{net.eval(tr_input).inspect} => #{result_label(net.eval(tr_input))}"
|
153
|
-
puts "#{net.eval(sq_input).inspect} => #{result_label(net.eval(sq_input))}"
|
154
|
-
puts "#{net.eval(cr_input).inspect} => #{result_label(net.eval(cr_input))}"
|
155
|
-
puts "Examples with noise"
|
156
|
-
puts "#{net.eval(tr_with_noise).inspect} => #{result_label(net.eval(tr_with_noise))}"
|
157
|
-
puts "#{net.eval(sq_with_noise).inspect} => #{result_label(net.eval(sq_with_noise))}"
|
158
|
-
puts "#{net.eval(cr_with_noise).inspect} => #{result_label(net.eval(cr_with_noise))}"
|
159
|
-
puts "Examples with base noise"
|
160
|
-
puts "#{net.eval(tr_with_base_noise).inspect} => #{result_label(net.eval(tr_with_base_noise))}"
|
161
|
-
puts "#{net.eval(sq_with_base_noise).inspect} => #{result_label(net.eval(sq_with_base_noise))}"
|
162
|
-
puts "#{net.eval(cr_with_base_noise).inspect} => #{result_label(net.eval(cr_with_base_noise))}"
|
163
|
-
]]>
|
164
|
-
</source>
|
165
|
-
</section>
|
166
|
-
<section id="nn3-example">
|
167
|
-
<title>Results obtained with the AI4R OCR algorithm</title>
|
168
|
-
<p>
|
169
|
-
The results we got when we evaluate patterns with our trained network are:
|
170
|
-
</p>
|
171
|
-
<ul>
|
172
|
-
<li>Evaluating the training patterns with the trained network:
|
173
|
-
<ol>
|
174
|
-
<li> <img alt="Triangule training example" src="/images/t.png" />
|
175
|
-
[0.98, 0.03, 0.01] => TRIANGLE </li>
|
176
|
-
<li> <img alt="Square training example" src="/images/s.png" />
|
177
|
-
[0.00, 0.96, 0.03] => SQUARE </li>
|
178
|
-
<li> <img alt="Cross training example" src="/images/c.png" />
|
179
|
-
[0.00, 0.00, 0.99] => CROSS </li>
|
180
|
-
</ol>
|
181
|
-
</li>
|
182
|
-
<li>Evaluating the patterns with random noise with the trained network:
|
183
|
-
<ol>
|
184
|
-
<li> <img alt="Triangule pattern with random noise" src="/images/t_wn.png" /> [0.98, 0.01, 0.01] => TRIANGLE </li>
|
185
|
-
<li> <img alt="Square pattern with random noise" src="/images/s_wn.png" /> [0.00, 0.96, 0.02] => SQUARE </li>
|
186
|
-
<li> <img alt="Cross pattern with random noise" src="/images/c_wn.png" /> [0.00, 0.00, 0.98] => CROSS </li>
|
187
|
-
</ol>
|
188
|
-
</li>
|
189
|
-
<li>Evaluating the patterns with line noise with the trained network:
|
190
|
-
<ol>
|
191
|
-
<li> <img alt="Triangule pattern with line noise" src="/images/t_wbn.png" /> [0.62, 0.00, 0.02] => TRIANGLE </li>
|
192
|
-
<li> <img alt="Square pattern with line noise" src="/images/s_wbn.png" /> [0.00, 0.75, 0.01] => SQUARE </li>
|
193
|
-
<li> <img alt="Cross pattern with line noise" src="/images/c_wbn.png" /> [0.00, 0.00, 0.98] => CROSS </li>
|
194
|
-
</ol>
|
195
|
-
</li>
|
196
|
-
</ul>
|
197
|
-
<p>These results are satisfactory. The network could sucessfully identify
|
198
|
-
the patterns despite the noise introduced to them.</p>
|
199
|
-
</section>
|
200
|
-
|
201
|
-
<section id="nn-custom">
|
202
|
-
<title>Customizing your neural network in ai4r</title>
|
203
|
-
<p>Sometime for a given problem, you will have to "play around" with some parameters to
|
204
|
-
get to a solution. This parameters are:</p>
|
205
|
-
|
206
|
-
<p>
|
207
|
-
<strong>Learning Rate</strong>: a real number, usually between 0.05
|
208
|
-
and 0.25.
|
209
|
-
</p>
|
210
|
-
<p>
|
211
|
-
<strong>Momentum</strong>: A momentum will avoid oscillations during
|
212
|
-
learning, converging to a solution in less iterations.
|
213
|
-
</p>
|
214
|
-
<p><strong>Propagation function</strong>: By default, f(x) = 1/(1 + e^(-x)).
|
215
|
-
This function is called
|
216
|
-
<a href="http://en.wikipedia.org/wiki/Sigmoid_function" title="wikipedia article on Sigmoid function">
|
217
|
-
Sigmoid function
|
218
|
-
</a>. You can see it like a "smoothed" version of the
|
219
|
-
<a href="http://en.wikipedia.org/wiki/Heaviside_step_function" title="Heaviside step function wikipedia article">
|
220
|
-
Heaviside step function
|
221
|
-
</a>. It will always provide a
|
222
|
-
value between 0 and 1. </p>
|
223
|
-
<p><img src="images/sigmoid.png" alt="Sigmoid function" /></p>
|
224
|
-
<p>
|
225
|
-
Sometimes you will have better results with f(x) = tanh(x), or even with
|
226
|
-
f(x) = x. If you modify the propagation function, you have to supply the
|
227
|
-
derivative function too (in terms of the propagation function result).
|
228
|
-
</p>
|
229
|
-
<p>To customize these parameters in AI4R, you can user the "set_parameters"
|
230
|
-
method:</p>
|
231
|
-
<source><![CDATA[
|
232
|
-
net.set_parameters(
|
233
|
-
:momentum => 0.15,
|
234
|
-
:learning_rate => 0.5,
|
235
|
-
:propagation_function => lambda { |x| Math.tanh(x) },
|
236
|
-
:derivative_propagation_function => lambda { |y| 1.0 - y**2 }
|
237
|
-
)
|
238
|
-
]]></source>
|
239
|
-
<p>You can also use the attribute accesors:</p>
|
240
|
-
<source><![CDATA[
|
241
|
-
net.momentum = 0.15
|
242
|
-
net.learning_rate = 0.5
|
243
|
-
net.propagation_function = lambda { |x| Math.tanh(x) }
|
244
|
-
net.derivative_propagation_function = lambda { |y| 1.0 - y**2 }
|
245
|
-
]]></source>
|
246
|
-
</section>
|
247
|
-
<note>Remember to set the custom parameters BEFORE training the network</note>
|
248
|
-
<section id="nn-more">
|
249
|
-
<title>More about Neural Networks and Backpropagation</title>
|
250
|
-
|
251
|
-
<ul>
|
252
|
-
<li>
|
253
|
-
<a href="http://en.wikipedia.org/wiki/Artificial_neural_network">Wikipedia article on Artificial Neural Networks</a>
|
254
|
-
</li>
|
255
|
-
<li>
|
256
|
-
<a href="http://en.wikipedia.org/wiki/Backpropagation">Wikipedia article on Backpropagation Algorithm</a>
|
257
|
-
</li>
|
258
|
-
<li>
|
259
|
-
<a href="http://www.tek271.com/articles/neuralNet/IntoToNeuralNets.html">Neural Networks - An Introduction by Abdul Habra</a>
|
260
|
-
</li>
|
261
|
-
<li>
|
262
|
-
<a href="http://galaxy.agh.edu.pl/~vlsi/AI/backp_t_en/backprop.html">A graphical explanation of the
|
263
|
-
backpropagation algorithm by Mariusz Bernacki and Przemysław Włodarczyk</a>
|
264
|
-
</li>
|
265
|
-
</ul>
|
266
|
-
|
267
|
-
</section>
|
268
|
-
|
269
|
-
</body>
|
270
|
-
</document>
|
@@ -1,54 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<!--
|
3
|
-
Licensed to the Apache Software Foundation (ASF) under one or more
|
4
|
-
contributor license agreements. See the NOTICE file distributed with
|
5
|
-
this work for additional information regarding copyright ownership.
|
6
|
-
The ASF licenses this file to You under the Apache License, Version 2.0
|
7
|
-
(the "License"); you may not use this file except in compliance with
|
8
|
-
the License. You may obtain a copy of the License at
|
9
|
-
|
10
|
-
http://www.apache.org/licenses/LICENSE-2.0
|
11
|
-
|
12
|
-
Unless required by applicable law or agreed to in writing, software
|
13
|
-
distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
-
See the License for the specific language governing permissions and
|
16
|
-
limitations under the License.
|
17
|
-
-->
|
18
|
-
<!--
|
19
|
-
Forrest site.xml
|
20
|
-
|
21
|
-
This file contains an outline of the site's information content. It is used to:
|
22
|
-
- Generate the website menus (though these can be overridden - see docs)
|
23
|
-
- Provide semantic, location-independent aliases for internal 'site:' URIs, eg
|
24
|
-
<link href="site:changes"> links to changes.html (or ../changes.html if in
|
25
|
-
subdir).
|
26
|
-
- Provide aliases for external URLs in the external-refs section. Eg, <link
|
27
|
-
href="ext:cocoon"> links to http://cocoon.apache.org/
|
28
|
-
|
29
|
-
See http://forrest.apache.org/docs/linking.html for more info
|
30
|
-
-->
|
31
|
-
<!-- The label attribute of the outer "site" element will only show
|
32
|
-
in the linkmap (linkmap.html).
|
33
|
-
Use elements project-name and group-name in skinconfig to change name of
|
34
|
-
your site or project that is usually shown at the top of page.
|
35
|
-
No matter what you configure for the href attribute, Forrest will
|
36
|
-
always use index.html when you request http://yourHost/
|
37
|
-
See FAQ: "How can I use a start-up-page other than index.html?"
|
38
|
-
-->
|
39
|
-
<site label="ai4r" href="" xmlns="http://apache.org/forrest/linkmap/1.0"
|
40
|
-
tab="">
|
41
|
-
<about label="Home">
|
42
|
-
<index label="Index" href="index.html" description="ai4r - Artificial Intelligence for Ruby"/>
|
43
|
-
<practicalExamples label="Practical Examples">
|
44
|
-
<geneticAlgorithms label="Genetic Algorithms" href="geneticAlgorithms.html" description="Genetic Algorithms in ruby"/>
|
45
|
-
<machineLearning label="Machine Learning" href="machineLearning.html" description="ID3 Decision Trees in ruby"/>
|
46
|
-
<neuralNetworks label="Neural Networks" href="neuralNetworks.html" description="Backpropagation Neural Network in ruby" />
|
47
|
-
</practicalExamples>
|
48
|
-
<download label="Downloads" href="downloads.html" description="ai4r - Download Files"/>
|
49
|
-
<forum label="Forum" href="http://forum.jadeferret.com/viewforum.php?f=3" description="Ai4r Forum"/>
|
50
|
-
<svn label="Source Code repository" href="sourceCode.html" description="ai4r source code repository"/>
|
51
|
-
<wiki label="AI4R Wiki" href="http://wiki.jadeferret.com/Category:AI4R" description="ai4r wiki with How-to guides and more documentation"/>
|
52
|
-
<rdoc label="API doc (Rdoc)" href="http://ai4r.rubyforge.org/rdoc/index.html" description="Ai4 RDoc docs"/>
|
53
|
-
</about>
|
54
|
-
</site>
|