ruby_wordcram 1.0.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.mvn/extensions.xml +8 -0
- data/.mvn/wrapper/maven-wrapper.properties +1 -0
- data/Rakefile +28 -5
- data/docs/_posts/2017-03-07-getting_started.md +3 -2
- data/docs/_posts/2017-03-07-under_the_hood.md +33 -0
- data/lib/WordCram.jar +0 -0
- data/lib/jsoup-1.10.2.jar +0 -0
- data/lib/ruby_wordcram/version.rb +1 -1
- data/lib/ruby_wordcram.rb +1 -2
- data/pom.rb +53 -0
- data/pom.xml +87 -0
- data/ruby_wordcram.gemspec +1 -2
- data/src/cue/lang/Counter.java +141 -0
- data/src/cue/lang/IterableText.java +10 -0
- data/src/cue/lang/NGramIterator.java +151 -0
- data/src/cue/lang/SentenceIterator.java +86 -0
- data/src/cue/lang/WordIterator.java +60 -0
- data/src/cue/lang/stop/StopWords.java +114 -0
- data/src/cue/lang/stop/arabic +351 -0
- data/src/cue/lang/stop/armenian +45 -0
- data/src/cue/lang/stop/catalan +219 -0
- data/src/cue/lang/stop/croatian +2024 -0
- data/src/cue/lang/stop/czech +256 -0
- data/src/cue/lang/stop/danish +94 -0
- data/src/cue/lang/stop/dutch +107 -0
- data/src/cue/lang/stop/english +183 -0
- data/src/cue/lang/stop/esperanto +180 -0
- data/src/cue/lang/stop/farsi +966 -0
- data/src/cue/lang/stop/finnish +235 -0
- data/src/cue/lang/stop/french +543 -0
- data/src/cue/lang/stop/german +231 -0
- data/src/cue/lang/stop/greek +637 -0
- data/src/cue/lang/stop/hebrew +220 -0
- data/src/cue/lang/stop/hindi +97 -0
- data/src/cue/lang/stop/hungarian +202 -0
- data/src/cue/lang/stop/italian +279 -0
- data/src/cue/lang/stop/latin +1 -0
- data/src/cue/lang/stop/norwegian +176 -0
- data/src/cue/lang/stop/polish +138 -0
- data/src/cue/lang/stop/portuguese +204 -0
- data/src/cue/lang/stop/romanian +284 -0
- data/src/cue/lang/stop/russian +652 -0
- data/src/cue/lang/stop/slovak +110 -0
- data/src/cue/lang/stop/slovenian +448 -0
- data/src/cue/lang/stop/spanish +308 -0
- data/src/cue/lang/stop/swedish +114 -0
- data/src/cue/lang/stop/turkish +117 -0
- data/src/cue/lang/unicode/BlockUtil.java +103 -0
- data/src/cue/lang/unicode/Normalizer.java +55 -0
- data/src/cue/lang/unicode/Normalizer6.java +32 -0
- data/src/license.txt +201 -0
- data/src/wordcram/Anglers.java +137 -0
- data/src/wordcram/BBTree.java +133 -0
- data/src/wordcram/BBTreeBuilder.java +61 -0
- data/src/wordcram/Colorers.java +52 -0
- data/src/wordcram/EngineWord.java +73 -0
- data/src/wordcram/Fonters.java +17 -0
- data/src/wordcram/HsbWordColorer.java +28 -0
- data/src/wordcram/ImageShaper.java +91 -0
- data/src/wordcram/Observer.java +9 -0
- data/src/wordcram/PlacerHeatMap.java +134 -0
- data/src/wordcram/Placers.java +74 -0
- data/src/wordcram/PlottingWordNudger.java +38 -0
- data/src/wordcram/PlottingWordPlacer.java +36 -0
- data/src/wordcram/ProcessingWordRenderer.java +42 -0
- data/src/wordcram/RandomWordNudger.java +44 -0
- data/src/wordcram/RenderOptions.java +10 -0
- data/src/wordcram/ShapeBasedPlacer.java +66 -0
- data/src/wordcram/Sizers.java +54 -0
- data/src/wordcram/SketchCallbackObserver.java +70 -0
- data/src/wordcram/SpiralWordNudger.java +31 -0
- data/src/wordcram/SvgWordRenderer.java +110 -0
- data/src/wordcram/SwirlWordPlacer.java +25 -0
- data/src/wordcram/UpperLeftWordPlacer.java +27 -0
- data/src/wordcram/WaveWordPlacer.java +25 -0
- data/src/wordcram/Word.java +357 -0
- data/src/wordcram/WordAngler.java +20 -0
- data/src/wordcram/WordArray.java +18 -0
- data/src/wordcram/WordBag.java +31 -0
- data/src/wordcram/WordColorer.java +25 -0
- data/src/wordcram/WordCounter.java +96 -0
- data/src/wordcram/WordCram.java +920 -0
- data/src/wordcram/WordCramEngine.java +196 -0
- data/src/wordcram/WordFonter.java +24 -0
- data/src/wordcram/WordNudger.java +44 -0
- data/src/wordcram/WordPlacer.java +44 -0
- data/src/wordcram/WordRenderer.java +10 -0
- data/src/wordcram/WordShaper.java +78 -0
- data/src/wordcram/WordSizer.java +46 -0
- data/src/wordcram/WordSkipReason.java +42 -0
- data/src/wordcram/WordSorterAndScaler.java +31 -0
- data/src/wordcram/WordSource.java +5 -0
- data/src/wordcram/text/Html.java +15 -0
- data/src/wordcram/text/Html2Text.java +17 -0
- data/src/wordcram/text/Text.java +15 -0
- data/src/wordcram/text/TextFile.java +23 -0
- data/src/wordcram/text/TextSource.java +5 -0
- data/src/wordcram/text/WebPage.java +23 -0
- metadata +94 -5
- data/lib/cue.language.jar +0 -0
- data/lib/jsoup-1.7.2.jar +0 -0
- data/vendors/Rakefile +0 -51
@@ -0,0 +1,42 @@
|
|
1
|
+
package wordcram;
|
2
|
+
|
3
|
+
public enum WordSkipReason {
|
4
|
+
|
5
|
+
/**
|
6
|
+
* Skip Reason: the Word was skipped because the WordCram had already drawn
|
7
|
+
* the {@link WordCram#maxNumberOfWordsToDraw(int)}.
|
8
|
+
* It's really about the Word's rank, its position in the list once the
|
9
|
+
* words are sorted by weight: if its rank is greater than the value passed
|
10
|
+
* to maxNumberOfWordsToDraw(), then it'll be skipped, and this will be the
|
11
|
+
* reason code.
|
12
|
+
*/
|
13
|
+
WAS_OVER_MAX_NUMBER_OF_WORDS("we already reached the maxNumberOfWordsToDraw threshold"),
|
14
|
+
|
15
|
+
/**
|
16
|
+
* Skip Reason: the Word's shape was smaller than
|
17
|
+
* {@link WordCram#minShapeSize(int)}.
|
18
|
+
* WordCram will only render words so small, for performance reasons. You
|
19
|
+
* can set the minimum Word shape size via
|
20
|
+
* {@link WordCram#minShapeSize(int)}.
|
21
|
+
*/
|
22
|
+
SHAPE_WAS_TOO_SMALL("it was below the minShapeSize threshold"),
|
23
|
+
|
24
|
+
/**
|
25
|
+
* Skip Reason: WordCram tried placing the Word, but it couldn't find a
|
26
|
+
* clear spot. The {@link WordNudger} nudged it around a bunch (according
|
27
|
+
* to {@link WordCram#maxAttemptsToPlaceWord(int)}, if it was set), but
|
28
|
+
* there was just no room.
|
29
|
+
*/
|
30
|
+
NO_SPACE("there wasn't enough space near where you wanted it placed");
|
31
|
+
|
32
|
+
String reason;
|
33
|
+
|
34
|
+
WordSkipReason(String reason) {
|
35
|
+
this.reason = reason;
|
36
|
+
}
|
37
|
+
|
38
|
+
@Override
|
39
|
+
public String toString() {
|
40
|
+
return reason;
|
41
|
+
}
|
42
|
+
}
|
@@ -0,0 +1,31 @@
|
|
1
|
+
package wordcram;
|
2
|
+
|
3
|
+
import java.util.Arrays;
|
4
|
+
|
5
|
+
class WordSorterAndScaler {
|
6
|
+
|
7
|
+
public Word[] sortAndScale(Word[] rawWords) {
|
8
|
+
if (rawWords.length == 0) {
|
9
|
+
return rawWords;
|
10
|
+
}
|
11
|
+
|
12
|
+
Word[] words = copy(rawWords);
|
13
|
+
Arrays.sort(words);
|
14
|
+
float maxWeight = words[0].weight;
|
15
|
+
|
16
|
+
for (Word word : words) {
|
17
|
+
word.weight = word.weight / maxWeight;
|
18
|
+
}
|
19
|
+
|
20
|
+
return words;
|
21
|
+
}
|
22
|
+
|
23
|
+
private Word[] copy(Word[] rawWords) {
|
24
|
+
|
25
|
+
// was Arrays.copyOf(rawWords, rawWords.length); - removed for Java 1.5 compatibility.
|
26
|
+
|
27
|
+
Word[] copy = new Word[rawWords.length];
|
28
|
+
System.arraycopy(rawWords, 0, copy, 0, copy.length);
|
29
|
+
return copy;
|
30
|
+
}
|
31
|
+
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
package wordcram.text;
|
2
|
+
|
3
|
+
public class Html implements TextSource {
|
4
|
+
|
5
|
+
private final String src;
|
6
|
+
|
7
|
+
public Html(String htmlSrc) {
|
8
|
+
src = htmlSrc;
|
9
|
+
}
|
10
|
+
|
11
|
+
@Override
|
12
|
+
public String getText() {
|
13
|
+
return new Html2Text().text(src, null);
|
14
|
+
}
|
15
|
+
}
|
@@ -0,0 +1,17 @@
|
|
1
|
+
package wordcram.text;
|
2
|
+
|
3
|
+
import org.jsoup.Jsoup;
|
4
|
+
import org.jsoup.nodes.Document;
|
5
|
+
|
6
|
+
class Html2Text {
|
7
|
+
public String text(String html, String cssSelector) {
|
8
|
+
Document doc = Jsoup.parse(html);
|
9
|
+
|
10
|
+
if (cssSelector == null) {
|
11
|
+
return doc.text();
|
12
|
+
}
|
13
|
+
else {
|
14
|
+
return doc.select(cssSelector).text();
|
15
|
+
}
|
16
|
+
}
|
17
|
+
}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
package wordcram.text;
|
2
|
+
|
3
|
+
import processing.core.PApplet;
|
4
|
+
|
5
|
+
public class TextFile implements TextSource {
|
6
|
+
|
7
|
+
private final String path;
|
8
|
+
|
9
|
+
// TODO if we move all .text.* classes into WordCram, we can kill this, and
|
10
|
+
// use pkg-local methods for setting the parent...
|
11
|
+
private final PApplet parent;
|
12
|
+
|
13
|
+
public TextFile(String path, PApplet parent) {
|
14
|
+
this.path = path;
|
15
|
+
this.parent = parent;
|
16
|
+
}
|
17
|
+
|
18
|
+
@Override
|
19
|
+
public String getText() {
|
20
|
+
return PApplet.join(parent.loadStrings(path), ' ');
|
21
|
+
}
|
22
|
+
|
23
|
+
}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
package wordcram.text;
|
2
|
+
|
3
|
+
import processing.core.PApplet;
|
4
|
+
|
5
|
+
public class WebPage implements TextSource {
|
6
|
+
|
7
|
+
private final String url;
|
8
|
+
private final String cssSelector;
|
9
|
+
private final PApplet parent;
|
10
|
+
|
11
|
+
public WebPage(String url, String cssSelector, PApplet parent) {
|
12
|
+
this.url = url;
|
13
|
+
this.cssSelector = cssSelector;
|
14
|
+
this.parent = parent;
|
15
|
+
}
|
16
|
+
|
17
|
+
@Override
|
18
|
+
public String getText() {
|
19
|
+
String html = PApplet.join(parent.loadStrings(url), ' ');
|
20
|
+
return new Html2Text().text(html, cssSelector);
|
21
|
+
}
|
22
|
+
|
23
|
+
}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby_wordcram
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dan Bernier
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2017-03-
|
13
|
+
date: 2017-03-11 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rake
|
@@ -42,6 +42,8 @@ extra_rdoc_files:
|
|
42
42
|
- LICENSE
|
43
43
|
files:
|
44
44
|
- ".gitignore"
|
45
|
+
- ".mvn/extensions.xml"
|
46
|
+
- ".mvn/wrapper/maven-wrapper.properties"
|
45
47
|
- CHANGELOG.md
|
46
48
|
- LICENSE
|
47
49
|
- README.md
|
@@ -59,6 +61,7 @@ files:
|
|
59
61
|
- docs/_layouts/page.html
|
60
62
|
- docs/_layouts/post.html
|
61
63
|
- docs/_posts/2017-03-07-getting_started.md
|
64
|
+
- docs/_posts/2017-03-07-under_the_hood.md
|
62
65
|
- docs/_sass/_base.scss
|
63
66
|
- docs/_sass/_layout.scss
|
64
67
|
- docs/_sass/_syntax-highlighting.scss
|
@@ -70,12 +73,98 @@ files:
|
|
70
73
|
- example/data/MINYN___.TTF
|
71
74
|
- example/test.rb
|
72
75
|
- lib/WordCram.jar
|
73
|
-
- lib/
|
74
|
-
- lib/jsoup-1.7.2.jar
|
76
|
+
- lib/jsoup-1.10.2.jar
|
75
77
|
- lib/ruby_wordcram.rb
|
76
78
|
- lib/ruby_wordcram/version.rb
|
79
|
+
- pom.rb
|
80
|
+
- pom.xml
|
77
81
|
- ruby_wordcram.gemspec
|
78
|
-
-
|
82
|
+
- src/cue/lang/Counter.java
|
83
|
+
- src/cue/lang/IterableText.java
|
84
|
+
- src/cue/lang/NGramIterator.java
|
85
|
+
- src/cue/lang/SentenceIterator.java
|
86
|
+
- src/cue/lang/WordIterator.java
|
87
|
+
- src/cue/lang/stop/StopWords.java
|
88
|
+
- src/cue/lang/stop/arabic
|
89
|
+
- src/cue/lang/stop/armenian
|
90
|
+
- src/cue/lang/stop/catalan
|
91
|
+
- src/cue/lang/stop/croatian
|
92
|
+
- src/cue/lang/stop/czech
|
93
|
+
- src/cue/lang/stop/danish
|
94
|
+
- src/cue/lang/stop/dutch
|
95
|
+
- src/cue/lang/stop/english
|
96
|
+
- src/cue/lang/stop/esperanto
|
97
|
+
- src/cue/lang/stop/farsi
|
98
|
+
- src/cue/lang/stop/finnish
|
99
|
+
- src/cue/lang/stop/french
|
100
|
+
- src/cue/lang/stop/german
|
101
|
+
- src/cue/lang/stop/greek
|
102
|
+
- src/cue/lang/stop/hebrew
|
103
|
+
- src/cue/lang/stop/hindi
|
104
|
+
- src/cue/lang/stop/hungarian
|
105
|
+
- src/cue/lang/stop/italian
|
106
|
+
- src/cue/lang/stop/latin
|
107
|
+
- src/cue/lang/stop/norwegian
|
108
|
+
- src/cue/lang/stop/polish
|
109
|
+
- src/cue/lang/stop/portuguese
|
110
|
+
- src/cue/lang/stop/romanian
|
111
|
+
- src/cue/lang/stop/russian
|
112
|
+
- src/cue/lang/stop/slovak
|
113
|
+
- src/cue/lang/stop/slovenian
|
114
|
+
- src/cue/lang/stop/spanish
|
115
|
+
- src/cue/lang/stop/swedish
|
116
|
+
- src/cue/lang/stop/turkish
|
117
|
+
- src/cue/lang/unicode/BlockUtil.java
|
118
|
+
- src/cue/lang/unicode/Normalizer.java
|
119
|
+
- src/cue/lang/unicode/Normalizer6.java
|
120
|
+
- src/license.txt
|
121
|
+
- src/wordcram/Anglers.java
|
122
|
+
- src/wordcram/BBTree.java
|
123
|
+
- src/wordcram/BBTreeBuilder.java
|
124
|
+
- src/wordcram/Colorers.java
|
125
|
+
- src/wordcram/EngineWord.java
|
126
|
+
- src/wordcram/Fonters.java
|
127
|
+
- src/wordcram/HsbWordColorer.java
|
128
|
+
- src/wordcram/ImageShaper.java
|
129
|
+
- src/wordcram/Observer.java
|
130
|
+
- src/wordcram/PlacerHeatMap.java
|
131
|
+
- src/wordcram/Placers.java
|
132
|
+
- src/wordcram/PlottingWordNudger.java
|
133
|
+
- src/wordcram/PlottingWordPlacer.java
|
134
|
+
- src/wordcram/ProcessingWordRenderer.java
|
135
|
+
- src/wordcram/RandomWordNudger.java
|
136
|
+
- src/wordcram/RenderOptions.java
|
137
|
+
- src/wordcram/ShapeBasedPlacer.java
|
138
|
+
- src/wordcram/Sizers.java
|
139
|
+
- src/wordcram/SketchCallbackObserver.java
|
140
|
+
- src/wordcram/SpiralWordNudger.java
|
141
|
+
- src/wordcram/SvgWordRenderer.java
|
142
|
+
- src/wordcram/SwirlWordPlacer.java
|
143
|
+
- src/wordcram/UpperLeftWordPlacer.java
|
144
|
+
- src/wordcram/WaveWordPlacer.java
|
145
|
+
- src/wordcram/Word.java
|
146
|
+
- src/wordcram/WordAngler.java
|
147
|
+
- src/wordcram/WordArray.java
|
148
|
+
- src/wordcram/WordBag.java
|
149
|
+
- src/wordcram/WordColorer.java
|
150
|
+
- src/wordcram/WordCounter.java
|
151
|
+
- src/wordcram/WordCram.java
|
152
|
+
- src/wordcram/WordCramEngine.java
|
153
|
+
- src/wordcram/WordFonter.java
|
154
|
+
- src/wordcram/WordNudger.java
|
155
|
+
- src/wordcram/WordPlacer.java
|
156
|
+
- src/wordcram/WordRenderer.java
|
157
|
+
- src/wordcram/WordShaper.java
|
158
|
+
- src/wordcram/WordSizer.java
|
159
|
+
- src/wordcram/WordSkipReason.java
|
160
|
+
- src/wordcram/WordSorterAndScaler.java
|
161
|
+
- src/wordcram/WordSource.java
|
162
|
+
- src/wordcram/text/Html.java
|
163
|
+
- src/wordcram/text/Html2Text.java
|
164
|
+
- src/wordcram/text/Text.java
|
165
|
+
- src/wordcram/text/TextFile.java
|
166
|
+
- src/wordcram/text/TextSource.java
|
167
|
+
- src/wordcram/text/WebPage.java
|
79
168
|
homepage: http://ruby-processing.github.io/WordCram/
|
80
169
|
licenses:
|
81
170
|
- Apache-2.0
|
data/lib/cue.language.jar
DELETED
Binary file
|
data/lib/jsoup-1.7.2.jar
DELETED
Binary file
|
data/vendors/Rakefile
DELETED
@@ -1,51 +0,0 @@
|
|
1
|
-
require 'rake/clean'
|
2
|
-
|
3
|
-
WARNING = <<-EOS.freeze
|
4
|
-
WARNING: you may not have wget installed
|
5
|
-
|
6
|
-
EOS
|
7
|
-
|
8
|
-
WORDCRAM_VERSION = '1.0.0'
|
9
|
-
CLOBBER.include("wordcram.#{WORDCRAM_VERSION}.zip")
|
10
|
-
|
11
|
-
desc "download, and copy to wordcram/lib"
|
12
|
-
task :default => [:download, :unpack_library, :copy_jars]
|
13
|
-
|
14
|
-
desc "download wordcram upstream sources"
|
15
|
-
task :download => ["wordcram.#{WORDCRAM_VERSION}.zip"]
|
16
|
-
|
17
|
-
file "wordcram.#{WORDCRAM_VERSION}.zip" do
|
18
|
-
begin
|
19
|
-
sh "wget http://wordcram.org/wordcram.#{WORDCRAM_VERSION}.zip"
|
20
|
-
rescue
|
21
|
-
warn(WARNING)
|
22
|
-
end
|
23
|
-
check_sha256("wordcram.#{WORDCRAM_VERSION}.zip", "d6b936db3628806099eba3f309fad81dae7f3db5e2a2846742c959b03eb0d25f")
|
24
|
-
end
|
25
|
-
|
26
|
-
desc "unpack wordcram library"
|
27
|
-
task :unpack_library do
|
28
|
-
sh "unzip wordcram.#{WORDCRAM_VERSION}.zip"
|
29
|
-
end
|
30
|
-
|
31
|
-
directory "../lib"
|
32
|
-
|
33
|
-
desc "copy jars"
|
34
|
-
task :copy_jars => ["../lib"] do
|
35
|
-
sh "cp -v WordCram/library/WordCram.jar ../lib/WordCram.jar"
|
36
|
-
sh "cp -v WordCram/library/cue.language.jar ../lib/cue.language.jar"
|
37
|
-
sh "cp -v WordCram/library/jsoup-1.7.2.jar ../lib/jsoup-1.7.2.jar"
|
38
|
-
end
|
39
|
-
|
40
|
-
def check_sha256(filename, expected_hash)
|
41
|
-
require "digest"
|
42
|
-
sha256 = Digest::SHA256.new
|
43
|
-
File.open(filename, "r") do |f|
|
44
|
-
while buf = f.read(4096)
|
45
|
-
sha256.update(buf)
|
46
|
-
end
|
47
|
-
end
|
48
|
-
if sha256.hexdigest != expected_hash
|
49
|
-
raise "bad sha256 checksum for #{filename} (expected #{expected_hash} got #{sha256.hexdigest})"
|
50
|
-
end
|
51
|
-
end
|