ruby_wordcram 1.0.1 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.mvn/extensions.xml +8 -0
- data/.mvn/wrapper/maven-wrapper.properties +1 -0
- data/Rakefile +28 -5
- data/docs/_posts/2017-03-07-getting_started.md +3 -2
- data/docs/_posts/2017-03-07-under_the_hood.md +33 -0
- data/lib/WordCram.jar +0 -0
- data/lib/jsoup-1.10.2.jar +0 -0
- data/lib/ruby_wordcram/version.rb +1 -1
- data/lib/ruby_wordcram.rb +1 -2
- data/pom.rb +53 -0
- data/pom.xml +87 -0
- data/ruby_wordcram.gemspec +1 -2
- data/src/cue/lang/Counter.java +141 -0
- data/src/cue/lang/IterableText.java +10 -0
- data/src/cue/lang/NGramIterator.java +151 -0
- data/src/cue/lang/SentenceIterator.java +86 -0
- data/src/cue/lang/WordIterator.java +60 -0
- data/src/cue/lang/stop/StopWords.java +114 -0
- data/src/cue/lang/stop/arabic +351 -0
- data/src/cue/lang/stop/armenian +45 -0
- data/src/cue/lang/stop/catalan +219 -0
- data/src/cue/lang/stop/croatian +2024 -0
- data/src/cue/lang/stop/czech +256 -0
- data/src/cue/lang/stop/danish +94 -0
- data/src/cue/lang/stop/dutch +107 -0
- data/src/cue/lang/stop/english +183 -0
- data/src/cue/lang/stop/esperanto +180 -0
- data/src/cue/lang/stop/farsi +966 -0
- data/src/cue/lang/stop/finnish +235 -0
- data/src/cue/lang/stop/french +543 -0
- data/src/cue/lang/stop/german +231 -0
- data/src/cue/lang/stop/greek +637 -0
- data/src/cue/lang/stop/hebrew +220 -0
- data/src/cue/lang/stop/hindi +97 -0
- data/src/cue/lang/stop/hungarian +202 -0
- data/src/cue/lang/stop/italian +279 -0
- data/src/cue/lang/stop/latin +1 -0
- data/src/cue/lang/stop/norwegian +176 -0
- data/src/cue/lang/stop/polish +138 -0
- data/src/cue/lang/stop/portuguese +204 -0
- data/src/cue/lang/stop/romanian +284 -0
- data/src/cue/lang/stop/russian +652 -0
- data/src/cue/lang/stop/slovak +110 -0
- data/src/cue/lang/stop/slovenian +448 -0
- data/src/cue/lang/stop/spanish +308 -0
- data/src/cue/lang/stop/swedish +114 -0
- data/src/cue/lang/stop/turkish +117 -0
- data/src/cue/lang/unicode/BlockUtil.java +103 -0
- data/src/cue/lang/unicode/Normalizer.java +55 -0
- data/src/cue/lang/unicode/Normalizer6.java +32 -0
- data/src/license.txt +201 -0
- data/src/wordcram/Anglers.java +137 -0
- data/src/wordcram/BBTree.java +133 -0
- data/src/wordcram/BBTreeBuilder.java +61 -0
- data/src/wordcram/Colorers.java +52 -0
- data/src/wordcram/EngineWord.java +73 -0
- data/src/wordcram/Fonters.java +17 -0
- data/src/wordcram/HsbWordColorer.java +28 -0
- data/src/wordcram/ImageShaper.java +91 -0
- data/src/wordcram/Observer.java +9 -0
- data/src/wordcram/PlacerHeatMap.java +134 -0
- data/src/wordcram/Placers.java +74 -0
- data/src/wordcram/PlottingWordNudger.java +38 -0
- data/src/wordcram/PlottingWordPlacer.java +36 -0
- data/src/wordcram/ProcessingWordRenderer.java +42 -0
- data/src/wordcram/RandomWordNudger.java +44 -0
- data/src/wordcram/RenderOptions.java +10 -0
- data/src/wordcram/ShapeBasedPlacer.java +66 -0
- data/src/wordcram/Sizers.java +54 -0
- data/src/wordcram/SketchCallbackObserver.java +70 -0
- data/src/wordcram/SpiralWordNudger.java +31 -0
- data/src/wordcram/SvgWordRenderer.java +110 -0
- data/src/wordcram/SwirlWordPlacer.java +25 -0
- data/src/wordcram/UpperLeftWordPlacer.java +27 -0
- data/src/wordcram/WaveWordPlacer.java +25 -0
- data/src/wordcram/Word.java +357 -0
- data/src/wordcram/WordAngler.java +20 -0
- data/src/wordcram/WordArray.java +18 -0
- data/src/wordcram/WordBag.java +31 -0
- data/src/wordcram/WordColorer.java +25 -0
- data/src/wordcram/WordCounter.java +96 -0
- data/src/wordcram/WordCram.java +920 -0
- data/src/wordcram/WordCramEngine.java +196 -0
- data/src/wordcram/WordFonter.java +24 -0
- data/src/wordcram/WordNudger.java +44 -0
- data/src/wordcram/WordPlacer.java +44 -0
- data/src/wordcram/WordRenderer.java +10 -0
- data/src/wordcram/WordShaper.java +78 -0
- data/src/wordcram/WordSizer.java +46 -0
- data/src/wordcram/WordSkipReason.java +42 -0
- data/src/wordcram/WordSorterAndScaler.java +31 -0
- data/src/wordcram/WordSource.java +5 -0
- data/src/wordcram/text/Html.java +15 -0
- data/src/wordcram/text/Html2Text.java +17 -0
- data/src/wordcram/text/Text.java +15 -0
- data/src/wordcram/text/TextFile.java +23 -0
- data/src/wordcram/text/TextSource.java +5 -0
- data/src/wordcram/text/WebPage.java +23 -0
- metadata +94 -5
- data/lib/cue.language.jar +0 -0
- data/lib/jsoup-1.7.2.jar +0 -0
- data/vendors/Rakefile +0 -51
@@ -0,0 +1,42 @@
|
|
1
|
+
package wordcram;
|
2
|
+
|
3
|
+
public enum WordSkipReason {
|
4
|
+
|
5
|
+
/**
|
6
|
+
* Skip Reason: the Word was skipped because the WordCram had already drawn
|
7
|
+
* the {@link WordCram#maxNumberOfWordsToDraw(int)}.
|
8
|
+
* It's really about the Word's rank, its position in the list once the
|
9
|
+
* words are sorted by weight: if its rank is greater than the value passed
|
10
|
+
* to maxNumberOfWordsToDraw(), then it'll be skipped, and this will be the
|
11
|
+
* reason code.
|
12
|
+
*/
|
13
|
+
WAS_OVER_MAX_NUMBER_OF_WORDS("we already reached the maxNumberOfWordsToDraw threshold"),
|
14
|
+
|
15
|
+
/**
|
16
|
+
* Skip Reason: the Word's shape was smaller than
|
17
|
+
* {@link WordCram#minShapeSize(int)}.
|
18
|
+
* WordCram will only render words so small, for performance reasons. You
|
19
|
+
* can set the minimum Word shape size via
|
20
|
+
* {@link WordCram#minShapeSize(int)}.
|
21
|
+
*/
|
22
|
+
SHAPE_WAS_TOO_SMALL("it was below the minShapeSize threshold"),
|
23
|
+
|
24
|
+
/**
|
25
|
+
* Skip Reason: WordCram tried placing the Word, but it couldn't find a
|
26
|
+
* clear spot. The {@link WordNudger} nudged it around a bunch (according
|
27
|
+
* to {@link WordCram#maxAttemptsToPlaceWord(int)}, if it was set), but
|
28
|
+
* there was just no room.
|
29
|
+
*/
|
30
|
+
NO_SPACE("there wasn't enough space near where you wanted it placed");
|
31
|
+
|
32
|
+
String reason;
|
33
|
+
|
34
|
+
WordSkipReason(String reason) {
|
35
|
+
this.reason = reason;
|
36
|
+
}
|
37
|
+
|
38
|
+
@Override
|
39
|
+
public String toString() {
|
40
|
+
return reason;
|
41
|
+
}
|
42
|
+
}
|
@@ -0,0 +1,31 @@
|
|
1
|
+
package wordcram;
|
2
|
+
|
3
|
+
import java.util.Arrays;
|
4
|
+
|
5
|
+
class WordSorterAndScaler {
|
6
|
+
|
7
|
+
public Word[] sortAndScale(Word[] rawWords) {
|
8
|
+
if (rawWords.length == 0) {
|
9
|
+
return rawWords;
|
10
|
+
}
|
11
|
+
|
12
|
+
Word[] words = copy(rawWords);
|
13
|
+
Arrays.sort(words);
|
14
|
+
float maxWeight = words[0].weight;
|
15
|
+
|
16
|
+
for (Word word : words) {
|
17
|
+
word.weight = word.weight / maxWeight;
|
18
|
+
}
|
19
|
+
|
20
|
+
return words;
|
21
|
+
}
|
22
|
+
|
23
|
+
private Word[] copy(Word[] rawWords) {
|
24
|
+
|
25
|
+
// was Arrays.copyOf(rawWords, rawWords.length); - removed for Java 1.5 compatibility.
|
26
|
+
|
27
|
+
Word[] copy = new Word[rawWords.length];
|
28
|
+
System.arraycopy(rawWords, 0, copy, 0, copy.length);
|
29
|
+
return copy;
|
30
|
+
}
|
31
|
+
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
package wordcram.text;
|
2
|
+
|
3
|
+
public class Html implements TextSource {
|
4
|
+
|
5
|
+
private final String src;
|
6
|
+
|
7
|
+
public Html(String htmlSrc) {
|
8
|
+
src = htmlSrc;
|
9
|
+
}
|
10
|
+
|
11
|
+
@Override
|
12
|
+
public String getText() {
|
13
|
+
return new Html2Text().text(src, null);
|
14
|
+
}
|
15
|
+
}
|
@@ -0,0 +1,17 @@
|
|
1
|
+
package wordcram.text;
|
2
|
+
|
3
|
+
import org.jsoup.Jsoup;
|
4
|
+
import org.jsoup.nodes.Document;
|
5
|
+
|
6
|
+
class Html2Text {
|
7
|
+
public String text(String html, String cssSelector) {
|
8
|
+
Document doc = Jsoup.parse(html);
|
9
|
+
|
10
|
+
if (cssSelector == null) {
|
11
|
+
return doc.text();
|
12
|
+
}
|
13
|
+
else {
|
14
|
+
return doc.select(cssSelector).text();
|
15
|
+
}
|
16
|
+
}
|
17
|
+
}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
package wordcram.text;
|
2
|
+
|
3
|
+
import processing.core.PApplet;
|
4
|
+
|
5
|
+
public class TextFile implements TextSource {
|
6
|
+
|
7
|
+
private final String path;
|
8
|
+
|
9
|
+
// TODO if we move all .text.* classes into WordCram, we can kill this, and
|
10
|
+
// use pkg-local methods for setting the parent...
|
11
|
+
private final PApplet parent;
|
12
|
+
|
13
|
+
public TextFile(String path, PApplet parent) {
|
14
|
+
this.path = path;
|
15
|
+
this.parent = parent;
|
16
|
+
}
|
17
|
+
|
18
|
+
@Override
|
19
|
+
public String getText() {
|
20
|
+
return PApplet.join(parent.loadStrings(path), ' ');
|
21
|
+
}
|
22
|
+
|
23
|
+
}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
package wordcram.text;
|
2
|
+
|
3
|
+
import processing.core.PApplet;
|
4
|
+
|
5
|
+
public class WebPage implements TextSource {
|
6
|
+
|
7
|
+
private final String url;
|
8
|
+
private final String cssSelector;
|
9
|
+
private final PApplet parent;
|
10
|
+
|
11
|
+
public WebPage(String url, String cssSelector, PApplet parent) {
|
12
|
+
this.url = url;
|
13
|
+
this.cssSelector = cssSelector;
|
14
|
+
this.parent = parent;
|
15
|
+
}
|
16
|
+
|
17
|
+
@Override
|
18
|
+
public String getText() {
|
19
|
+
String html = PApplet.join(parent.loadStrings(url), ' ');
|
20
|
+
return new Html2Text().text(html, cssSelector);
|
21
|
+
}
|
22
|
+
|
23
|
+
}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby_wordcram
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dan Bernier
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2017-03-
|
13
|
+
date: 2017-03-11 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rake
|
@@ -42,6 +42,8 @@ extra_rdoc_files:
|
|
42
42
|
- LICENSE
|
43
43
|
files:
|
44
44
|
- ".gitignore"
|
45
|
+
- ".mvn/extensions.xml"
|
46
|
+
- ".mvn/wrapper/maven-wrapper.properties"
|
45
47
|
- CHANGELOG.md
|
46
48
|
- LICENSE
|
47
49
|
- README.md
|
@@ -59,6 +61,7 @@ files:
|
|
59
61
|
- docs/_layouts/page.html
|
60
62
|
- docs/_layouts/post.html
|
61
63
|
- docs/_posts/2017-03-07-getting_started.md
|
64
|
+
- docs/_posts/2017-03-07-under_the_hood.md
|
62
65
|
- docs/_sass/_base.scss
|
63
66
|
- docs/_sass/_layout.scss
|
64
67
|
- docs/_sass/_syntax-highlighting.scss
|
@@ -70,12 +73,98 @@ files:
|
|
70
73
|
- example/data/MINYN___.TTF
|
71
74
|
- example/test.rb
|
72
75
|
- lib/WordCram.jar
|
73
|
-
- lib/
|
74
|
-
- lib/jsoup-1.7.2.jar
|
76
|
+
- lib/jsoup-1.10.2.jar
|
75
77
|
- lib/ruby_wordcram.rb
|
76
78
|
- lib/ruby_wordcram/version.rb
|
79
|
+
- pom.rb
|
80
|
+
- pom.xml
|
77
81
|
- ruby_wordcram.gemspec
|
78
|
-
-
|
82
|
+
- src/cue/lang/Counter.java
|
83
|
+
- src/cue/lang/IterableText.java
|
84
|
+
- src/cue/lang/NGramIterator.java
|
85
|
+
- src/cue/lang/SentenceIterator.java
|
86
|
+
- src/cue/lang/WordIterator.java
|
87
|
+
- src/cue/lang/stop/StopWords.java
|
88
|
+
- src/cue/lang/stop/arabic
|
89
|
+
- src/cue/lang/stop/armenian
|
90
|
+
- src/cue/lang/stop/catalan
|
91
|
+
- src/cue/lang/stop/croatian
|
92
|
+
- src/cue/lang/stop/czech
|
93
|
+
- src/cue/lang/stop/danish
|
94
|
+
- src/cue/lang/stop/dutch
|
95
|
+
- src/cue/lang/stop/english
|
96
|
+
- src/cue/lang/stop/esperanto
|
97
|
+
- src/cue/lang/stop/farsi
|
98
|
+
- src/cue/lang/stop/finnish
|
99
|
+
- src/cue/lang/stop/french
|
100
|
+
- src/cue/lang/stop/german
|
101
|
+
- src/cue/lang/stop/greek
|
102
|
+
- src/cue/lang/stop/hebrew
|
103
|
+
- src/cue/lang/stop/hindi
|
104
|
+
- src/cue/lang/stop/hungarian
|
105
|
+
- src/cue/lang/stop/italian
|
106
|
+
- src/cue/lang/stop/latin
|
107
|
+
- src/cue/lang/stop/norwegian
|
108
|
+
- src/cue/lang/stop/polish
|
109
|
+
- src/cue/lang/stop/portuguese
|
110
|
+
- src/cue/lang/stop/romanian
|
111
|
+
- src/cue/lang/stop/russian
|
112
|
+
- src/cue/lang/stop/slovak
|
113
|
+
- src/cue/lang/stop/slovenian
|
114
|
+
- src/cue/lang/stop/spanish
|
115
|
+
- src/cue/lang/stop/swedish
|
116
|
+
- src/cue/lang/stop/turkish
|
117
|
+
- src/cue/lang/unicode/BlockUtil.java
|
118
|
+
- src/cue/lang/unicode/Normalizer.java
|
119
|
+
- src/cue/lang/unicode/Normalizer6.java
|
120
|
+
- src/license.txt
|
121
|
+
- src/wordcram/Anglers.java
|
122
|
+
- src/wordcram/BBTree.java
|
123
|
+
- src/wordcram/BBTreeBuilder.java
|
124
|
+
- src/wordcram/Colorers.java
|
125
|
+
- src/wordcram/EngineWord.java
|
126
|
+
- src/wordcram/Fonters.java
|
127
|
+
- src/wordcram/HsbWordColorer.java
|
128
|
+
- src/wordcram/ImageShaper.java
|
129
|
+
- src/wordcram/Observer.java
|
130
|
+
- src/wordcram/PlacerHeatMap.java
|
131
|
+
- src/wordcram/Placers.java
|
132
|
+
- src/wordcram/PlottingWordNudger.java
|
133
|
+
- src/wordcram/PlottingWordPlacer.java
|
134
|
+
- src/wordcram/ProcessingWordRenderer.java
|
135
|
+
- src/wordcram/RandomWordNudger.java
|
136
|
+
- src/wordcram/RenderOptions.java
|
137
|
+
- src/wordcram/ShapeBasedPlacer.java
|
138
|
+
- src/wordcram/Sizers.java
|
139
|
+
- src/wordcram/SketchCallbackObserver.java
|
140
|
+
- src/wordcram/SpiralWordNudger.java
|
141
|
+
- src/wordcram/SvgWordRenderer.java
|
142
|
+
- src/wordcram/SwirlWordPlacer.java
|
143
|
+
- src/wordcram/UpperLeftWordPlacer.java
|
144
|
+
- src/wordcram/WaveWordPlacer.java
|
145
|
+
- src/wordcram/Word.java
|
146
|
+
- src/wordcram/WordAngler.java
|
147
|
+
- src/wordcram/WordArray.java
|
148
|
+
- src/wordcram/WordBag.java
|
149
|
+
- src/wordcram/WordColorer.java
|
150
|
+
- src/wordcram/WordCounter.java
|
151
|
+
- src/wordcram/WordCram.java
|
152
|
+
- src/wordcram/WordCramEngine.java
|
153
|
+
- src/wordcram/WordFonter.java
|
154
|
+
- src/wordcram/WordNudger.java
|
155
|
+
- src/wordcram/WordPlacer.java
|
156
|
+
- src/wordcram/WordRenderer.java
|
157
|
+
- src/wordcram/WordShaper.java
|
158
|
+
- src/wordcram/WordSizer.java
|
159
|
+
- src/wordcram/WordSkipReason.java
|
160
|
+
- src/wordcram/WordSorterAndScaler.java
|
161
|
+
- src/wordcram/WordSource.java
|
162
|
+
- src/wordcram/text/Html.java
|
163
|
+
- src/wordcram/text/Html2Text.java
|
164
|
+
- src/wordcram/text/Text.java
|
165
|
+
- src/wordcram/text/TextFile.java
|
166
|
+
- src/wordcram/text/TextSource.java
|
167
|
+
- src/wordcram/text/WebPage.java
|
79
168
|
homepage: http://ruby-processing.github.io/WordCram/
|
80
169
|
licenses:
|
81
170
|
- Apache-2.0
|
data/lib/cue.language.jar
DELETED
Binary file
|
data/lib/jsoup-1.7.2.jar
DELETED
Binary file
|
data/vendors/Rakefile
DELETED
@@ -1,51 +0,0 @@
|
|
1
|
-
require 'rake/clean'
|
2
|
-
|
3
|
-
WARNING = <<-EOS.freeze
|
4
|
-
WARNING: you may not have wget installed
|
5
|
-
|
6
|
-
EOS
|
7
|
-
|
8
|
-
WORDCRAM_VERSION = '1.0.0'
|
9
|
-
CLOBBER.include("wordcram.#{WORDCRAM_VERSION}.zip")
|
10
|
-
|
11
|
-
desc "download, and copy to wordcram/lib"
|
12
|
-
task :default => [:download, :unpack_library, :copy_jars]
|
13
|
-
|
14
|
-
desc "download wordcram upstream sources"
|
15
|
-
task :download => ["wordcram.#{WORDCRAM_VERSION}.zip"]
|
16
|
-
|
17
|
-
file "wordcram.#{WORDCRAM_VERSION}.zip" do
|
18
|
-
begin
|
19
|
-
sh "wget http://wordcram.org/wordcram.#{WORDCRAM_VERSION}.zip"
|
20
|
-
rescue
|
21
|
-
warn(WARNING)
|
22
|
-
end
|
23
|
-
check_sha256("wordcram.#{WORDCRAM_VERSION}.zip", "d6b936db3628806099eba3f309fad81dae7f3db5e2a2846742c959b03eb0d25f")
|
24
|
-
end
|
25
|
-
|
26
|
-
desc "unpack wordcram library"
|
27
|
-
task :unpack_library do
|
28
|
-
sh "unzip wordcram.#{WORDCRAM_VERSION}.zip"
|
29
|
-
end
|
30
|
-
|
31
|
-
directory "../lib"
|
32
|
-
|
33
|
-
desc "copy jars"
|
34
|
-
task :copy_jars => ["../lib"] do
|
35
|
-
sh "cp -v WordCram/library/WordCram.jar ../lib/WordCram.jar"
|
36
|
-
sh "cp -v WordCram/library/cue.language.jar ../lib/cue.language.jar"
|
37
|
-
sh "cp -v WordCram/library/jsoup-1.7.2.jar ../lib/jsoup-1.7.2.jar"
|
38
|
-
end
|
39
|
-
|
40
|
-
def check_sha256(filename, expected_hash)
|
41
|
-
require "digest"
|
42
|
-
sha256 = Digest::SHA256.new
|
43
|
-
File.open(filename, "r") do |f|
|
44
|
-
while buf = f.read(4096)
|
45
|
-
sha256.update(buf)
|
46
|
-
end
|
47
|
-
end
|
48
|
-
if sha256.hexdigest != expected_hash
|
49
|
-
raise "bad sha256 checksum for #{filename} (expected #{expected_hash} got #{sha256.hexdigest})"
|
50
|
-
end
|
51
|
-
end
|