ruby_wordcram 1.0.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.mvn/extensions.xml +8 -0
  4. data/.mvn/wrapper/maven-wrapper.properties +1 -0
  5. data/Rakefile +28 -5
  6. data/docs/_posts/2017-03-07-getting_started.md +3 -2
  7. data/docs/_posts/2017-03-07-under_the_hood.md +33 -0
  8. data/lib/WordCram.jar +0 -0
  9. data/lib/jsoup-1.10.2.jar +0 -0
  10. data/lib/ruby_wordcram/version.rb +1 -1
  11. data/lib/ruby_wordcram.rb +1 -2
  12. data/pom.rb +53 -0
  13. data/pom.xml +87 -0
  14. data/ruby_wordcram.gemspec +1 -2
  15. data/src/cue/lang/Counter.java +141 -0
  16. data/src/cue/lang/IterableText.java +10 -0
  17. data/src/cue/lang/NGramIterator.java +151 -0
  18. data/src/cue/lang/SentenceIterator.java +86 -0
  19. data/src/cue/lang/WordIterator.java +60 -0
  20. data/src/cue/lang/stop/StopWords.java +114 -0
  21. data/src/cue/lang/stop/arabic +351 -0
  22. data/src/cue/lang/stop/armenian +45 -0
  23. data/src/cue/lang/stop/catalan +219 -0
  24. data/src/cue/lang/stop/croatian +2024 -0
  25. data/src/cue/lang/stop/czech +256 -0
  26. data/src/cue/lang/stop/danish +94 -0
  27. data/src/cue/lang/stop/dutch +107 -0
  28. data/src/cue/lang/stop/english +183 -0
  29. data/src/cue/lang/stop/esperanto +180 -0
  30. data/src/cue/lang/stop/farsi +966 -0
  31. data/src/cue/lang/stop/finnish +235 -0
  32. data/src/cue/lang/stop/french +543 -0
  33. data/src/cue/lang/stop/german +231 -0
  34. data/src/cue/lang/stop/greek +637 -0
  35. data/src/cue/lang/stop/hebrew +220 -0
  36. data/src/cue/lang/stop/hindi +97 -0
  37. data/src/cue/lang/stop/hungarian +202 -0
  38. data/src/cue/lang/stop/italian +279 -0
  39. data/src/cue/lang/stop/latin +1 -0
  40. data/src/cue/lang/stop/norwegian +176 -0
  41. data/src/cue/lang/stop/polish +138 -0
  42. data/src/cue/lang/stop/portuguese +204 -0
  43. data/src/cue/lang/stop/romanian +284 -0
  44. data/src/cue/lang/stop/russian +652 -0
  45. data/src/cue/lang/stop/slovak +110 -0
  46. data/src/cue/lang/stop/slovenian +448 -0
  47. data/src/cue/lang/stop/spanish +308 -0
  48. data/src/cue/lang/stop/swedish +114 -0
  49. data/src/cue/lang/stop/turkish +117 -0
  50. data/src/cue/lang/unicode/BlockUtil.java +103 -0
  51. data/src/cue/lang/unicode/Normalizer.java +55 -0
  52. data/src/cue/lang/unicode/Normalizer6.java +32 -0
  53. data/src/license.txt +201 -0
  54. data/src/wordcram/Anglers.java +137 -0
  55. data/src/wordcram/BBTree.java +133 -0
  56. data/src/wordcram/BBTreeBuilder.java +61 -0
  57. data/src/wordcram/Colorers.java +52 -0
  58. data/src/wordcram/EngineWord.java +73 -0
  59. data/src/wordcram/Fonters.java +17 -0
  60. data/src/wordcram/HsbWordColorer.java +28 -0
  61. data/src/wordcram/ImageShaper.java +91 -0
  62. data/src/wordcram/Observer.java +9 -0
  63. data/src/wordcram/PlacerHeatMap.java +134 -0
  64. data/src/wordcram/Placers.java +74 -0
  65. data/src/wordcram/PlottingWordNudger.java +38 -0
  66. data/src/wordcram/PlottingWordPlacer.java +36 -0
  67. data/src/wordcram/ProcessingWordRenderer.java +42 -0
  68. data/src/wordcram/RandomWordNudger.java +44 -0
  69. data/src/wordcram/RenderOptions.java +10 -0
  70. data/src/wordcram/ShapeBasedPlacer.java +66 -0
  71. data/src/wordcram/Sizers.java +54 -0
  72. data/src/wordcram/SketchCallbackObserver.java +70 -0
  73. data/src/wordcram/SpiralWordNudger.java +31 -0
  74. data/src/wordcram/SvgWordRenderer.java +110 -0
  75. data/src/wordcram/SwirlWordPlacer.java +25 -0
  76. data/src/wordcram/UpperLeftWordPlacer.java +27 -0
  77. data/src/wordcram/WaveWordPlacer.java +25 -0
  78. data/src/wordcram/Word.java +357 -0
  79. data/src/wordcram/WordAngler.java +20 -0
  80. data/src/wordcram/WordArray.java +18 -0
  81. data/src/wordcram/WordBag.java +31 -0
  82. data/src/wordcram/WordColorer.java +25 -0
  83. data/src/wordcram/WordCounter.java +96 -0
  84. data/src/wordcram/WordCram.java +920 -0
  85. data/src/wordcram/WordCramEngine.java +196 -0
  86. data/src/wordcram/WordFonter.java +24 -0
  87. data/src/wordcram/WordNudger.java +44 -0
  88. data/src/wordcram/WordPlacer.java +44 -0
  89. data/src/wordcram/WordRenderer.java +10 -0
  90. data/src/wordcram/WordShaper.java +78 -0
  91. data/src/wordcram/WordSizer.java +46 -0
  92. data/src/wordcram/WordSkipReason.java +42 -0
  93. data/src/wordcram/WordSorterAndScaler.java +31 -0
  94. data/src/wordcram/WordSource.java +5 -0
  95. data/src/wordcram/text/Html.java +15 -0
  96. data/src/wordcram/text/Html2Text.java +17 -0
  97. data/src/wordcram/text/Text.java +15 -0
  98. data/src/wordcram/text/TextFile.java +23 -0
  99. data/src/wordcram/text/TextSource.java +5 -0
  100. data/src/wordcram/text/WebPage.java +23 -0
  101. metadata +94 -5
  102. data/lib/cue.language.jar +0 -0
  103. data/lib/jsoup-1.7.2.jar +0 -0
  104. data/vendors/Rakefile +0 -51
@@ -0,0 +1,42 @@
1
+ package wordcram;
2
+
3
+ public enum WordSkipReason {
4
+
5
+ /**
6
+ * Skip Reason: the Word was skipped because the WordCram had already drawn
7
+ * the {@link WordCram#maxNumberOfWordsToDraw(int)}.
8
+ * It's really about the Word's rank, its position in the list once the
9
+ * words are sorted by weight: if its rank is greater than the value passed
10
+ * to maxNumberOfWordsToDraw(), then it'll be skipped, and this will be the
11
+ * reason code.
12
+ */
13
+ WAS_OVER_MAX_NUMBER_OF_WORDS("we already reached the maxNumberOfWordsToDraw threshold"),
14
+
15
+ /**
16
+ * Skip Reason: the Word's shape was smaller than
17
+ * {@link WordCram#minShapeSize(int)}.
18
+ * WordCram will only render words so small, for performance reasons. You
19
+ * can set the minimum Word shape size via
20
+ * {@link WordCram#minShapeSize(int)}.
21
+ */
22
+ SHAPE_WAS_TOO_SMALL("it was below the minShapeSize threshold"),
23
+
24
+ /**
25
+ * Skip Reason: WordCram tried placing the Word, but it couldn't find a
26
+ * clear spot. The {@link WordNudger} nudged it around a bunch (according
27
+ * to {@link WordCram#maxAttemptsToPlaceWord(int)}, if it was set), but
28
+ * there was just no room.
29
+ */
30
+ NO_SPACE("there wasn't enough space near where you wanted it placed");
31
+
32
+ String reason;
33
+
34
+ WordSkipReason(String reason) {
35
+ this.reason = reason;
36
+ }
37
+
38
+ @Override
39
+ public String toString() {
40
+ return reason;
41
+ }
42
+ }
@@ -0,0 +1,31 @@
1
+ package wordcram;
2
+
3
+ import java.util.Arrays;
4
+
5
+ class WordSorterAndScaler {
6
+
7
+ public Word[] sortAndScale(Word[] rawWords) {
8
+ if (rawWords.length == 0) {
9
+ return rawWords;
10
+ }
11
+
12
+ Word[] words = copy(rawWords);
13
+ Arrays.sort(words);
14
+ float maxWeight = words[0].weight;
15
+
16
+ for (Word word : words) {
17
+ word.weight = word.weight / maxWeight;
18
+ }
19
+
20
+ return words;
21
+ }
22
+
23
+ private Word[] copy(Word[] rawWords) {
24
+
25
+ // was Arrays.copyOf(rawWords, rawWords.length); - removed for Java 1.5 compatibility.
26
+
27
+ Word[] copy = new Word[rawWords.length];
28
+ System.arraycopy(rawWords, 0, copy, 0, copy.length);
29
+ return copy;
30
+ }
31
+ }
@@ -0,0 +1,5 @@
1
+ package wordcram;
2
+
3
+ public interface WordSource {
4
+ public Word[] getWords();
5
+ }
@@ -0,0 +1,15 @@
1
+ package wordcram.text;
2
+
3
+ public class Html implements TextSource {
4
+
5
+ private final String src;
6
+
7
+ public Html(String htmlSrc) {
8
+ src = htmlSrc;
9
+ }
10
+
11
+ @Override
12
+ public String getText() {
13
+ return new Html2Text().text(src, null);
14
+ }
15
+ }
@@ -0,0 +1,17 @@
1
+ package wordcram.text;
2
+
3
+ import org.jsoup.Jsoup;
4
+ import org.jsoup.nodes.Document;
5
+
6
+ class Html2Text {
7
+ public String text(String html, String cssSelector) {
8
+ Document doc = Jsoup.parse(html);
9
+
10
+ if (cssSelector == null) {
11
+ return doc.text();
12
+ }
13
+ else {
14
+ return doc.select(cssSelector).text();
15
+ }
16
+ }
17
+ }
@@ -0,0 +1,15 @@
1
+ package wordcram.text;
2
+
3
+ public class Text implements TextSource {
4
+
5
+ private final String text;
6
+
7
+ public Text(String _text) {
8
+ text = _text;
9
+ }
10
+
11
+ @Override
12
+ public String getText() {
13
+ return text;
14
+ }
15
+ }
@@ -0,0 +1,23 @@
1
+ package wordcram.text;
2
+
3
+ import processing.core.PApplet;
4
+
5
+ public class TextFile implements TextSource {
6
+
7
+ private final String path;
8
+
9
+ // TODO if we move all .text.* classes into WordCram, we can kill this, and
10
+ // use pkg-local methods for setting the parent...
11
+ private final PApplet parent;
12
+
13
+ public TextFile(String path, PApplet parent) {
14
+ this.path = path;
15
+ this.parent = parent;
16
+ }
17
+
18
+ @Override
19
+ public String getText() {
20
+ return PApplet.join(parent.loadStrings(path), ' ');
21
+ }
22
+
23
+ }
@@ -0,0 +1,5 @@
1
+ package wordcram.text;
2
+
3
+ public interface TextSource {
4
+ public String getText();
5
+ }
@@ -0,0 +1,23 @@
1
+ package wordcram.text;
2
+
3
+ import processing.core.PApplet;
4
+
5
+ public class WebPage implements TextSource {
6
+
7
+ private final String url;
8
+ private final String cssSelector;
9
+ private final PApplet parent;
10
+
11
+ public WebPage(String url, String cssSelector, PApplet parent) {
12
+ this.url = url;
13
+ this.cssSelector = cssSelector;
14
+ this.parent = parent;
15
+ }
16
+
17
+ @Override
18
+ public String getText() {
19
+ String html = PApplet.join(parent.loadStrings(url), ' ');
20
+ return new Html2Text().text(html, cssSelector);
21
+ }
22
+
23
+ }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby_wordcram
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dan Bernier
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2017-03-08 00:00:00.000000000 Z
13
+ date: 2017-03-11 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: rake
@@ -42,6 +42,8 @@ extra_rdoc_files:
42
42
  - LICENSE
43
43
  files:
44
44
  - ".gitignore"
45
+ - ".mvn/extensions.xml"
46
+ - ".mvn/wrapper/maven-wrapper.properties"
45
47
  - CHANGELOG.md
46
48
  - LICENSE
47
49
  - README.md
@@ -59,6 +61,7 @@ files:
59
61
  - docs/_layouts/page.html
60
62
  - docs/_layouts/post.html
61
63
  - docs/_posts/2017-03-07-getting_started.md
64
+ - docs/_posts/2017-03-07-under_the_hood.md
62
65
  - docs/_sass/_base.scss
63
66
  - docs/_sass/_layout.scss
64
67
  - docs/_sass/_syntax-highlighting.scss
@@ -70,12 +73,98 @@ files:
70
73
  - example/data/MINYN___.TTF
71
74
  - example/test.rb
72
75
  - lib/WordCram.jar
73
- - lib/cue.language.jar
74
- - lib/jsoup-1.7.2.jar
76
+ - lib/jsoup-1.10.2.jar
75
77
  - lib/ruby_wordcram.rb
76
78
  - lib/ruby_wordcram/version.rb
79
+ - pom.rb
80
+ - pom.xml
77
81
  - ruby_wordcram.gemspec
78
- - vendors/Rakefile
82
+ - src/cue/lang/Counter.java
83
+ - src/cue/lang/IterableText.java
84
+ - src/cue/lang/NGramIterator.java
85
+ - src/cue/lang/SentenceIterator.java
86
+ - src/cue/lang/WordIterator.java
87
+ - src/cue/lang/stop/StopWords.java
88
+ - src/cue/lang/stop/arabic
89
+ - src/cue/lang/stop/armenian
90
+ - src/cue/lang/stop/catalan
91
+ - src/cue/lang/stop/croatian
92
+ - src/cue/lang/stop/czech
93
+ - src/cue/lang/stop/danish
94
+ - src/cue/lang/stop/dutch
95
+ - src/cue/lang/stop/english
96
+ - src/cue/lang/stop/esperanto
97
+ - src/cue/lang/stop/farsi
98
+ - src/cue/lang/stop/finnish
99
+ - src/cue/lang/stop/french
100
+ - src/cue/lang/stop/german
101
+ - src/cue/lang/stop/greek
102
+ - src/cue/lang/stop/hebrew
103
+ - src/cue/lang/stop/hindi
104
+ - src/cue/lang/stop/hungarian
105
+ - src/cue/lang/stop/italian
106
+ - src/cue/lang/stop/latin
107
+ - src/cue/lang/stop/norwegian
108
+ - src/cue/lang/stop/polish
109
+ - src/cue/lang/stop/portuguese
110
+ - src/cue/lang/stop/romanian
111
+ - src/cue/lang/stop/russian
112
+ - src/cue/lang/stop/slovak
113
+ - src/cue/lang/stop/slovenian
114
+ - src/cue/lang/stop/spanish
115
+ - src/cue/lang/stop/swedish
116
+ - src/cue/lang/stop/turkish
117
+ - src/cue/lang/unicode/BlockUtil.java
118
+ - src/cue/lang/unicode/Normalizer.java
119
+ - src/cue/lang/unicode/Normalizer6.java
120
+ - src/license.txt
121
+ - src/wordcram/Anglers.java
122
+ - src/wordcram/BBTree.java
123
+ - src/wordcram/BBTreeBuilder.java
124
+ - src/wordcram/Colorers.java
125
+ - src/wordcram/EngineWord.java
126
+ - src/wordcram/Fonters.java
127
+ - src/wordcram/HsbWordColorer.java
128
+ - src/wordcram/ImageShaper.java
129
+ - src/wordcram/Observer.java
130
+ - src/wordcram/PlacerHeatMap.java
131
+ - src/wordcram/Placers.java
132
+ - src/wordcram/PlottingWordNudger.java
133
+ - src/wordcram/PlottingWordPlacer.java
134
+ - src/wordcram/ProcessingWordRenderer.java
135
+ - src/wordcram/RandomWordNudger.java
136
+ - src/wordcram/RenderOptions.java
137
+ - src/wordcram/ShapeBasedPlacer.java
138
+ - src/wordcram/Sizers.java
139
+ - src/wordcram/SketchCallbackObserver.java
140
+ - src/wordcram/SpiralWordNudger.java
141
+ - src/wordcram/SvgWordRenderer.java
142
+ - src/wordcram/SwirlWordPlacer.java
143
+ - src/wordcram/UpperLeftWordPlacer.java
144
+ - src/wordcram/WaveWordPlacer.java
145
+ - src/wordcram/Word.java
146
+ - src/wordcram/WordAngler.java
147
+ - src/wordcram/WordArray.java
148
+ - src/wordcram/WordBag.java
149
+ - src/wordcram/WordColorer.java
150
+ - src/wordcram/WordCounter.java
151
+ - src/wordcram/WordCram.java
152
+ - src/wordcram/WordCramEngine.java
153
+ - src/wordcram/WordFonter.java
154
+ - src/wordcram/WordNudger.java
155
+ - src/wordcram/WordPlacer.java
156
+ - src/wordcram/WordRenderer.java
157
+ - src/wordcram/WordShaper.java
158
+ - src/wordcram/WordSizer.java
159
+ - src/wordcram/WordSkipReason.java
160
+ - src/wordcram/WordSorterAndScaler.java
161
+ - src/wordcram/WordSource.java
162
+ - src/wordcram/text/Html.java
163
+ - src/wordcram/text/Html2Text.java
164
+ - src/wordcram/text/Text.java
165
+ - src/wordcram/text/TextFile.java
166
+ - src/wordcram/text/TextSource.java
167
+ - src/wordcram/text/WebPage.java
79
168
  homepage: http://ruby-processing.github.io/WordCram/
80
169
  licenses:
81
170
  - Apache-2.0
data/lib/cue.language.jar DELETED
Binary file
data/lib/jsoup-1.7.2.jar DELETED
Binary file
data/vendors/Rakefile DELETED
@@ -1,51 +0,0 @@
1
- require 'rake/clean'
2
-
3
- WARNING = <<-EOS.freeze
4
- WARNING: you may not have wget installed
5
-
6
- EOS
7
-
8
- WORDCRAM_VERSION = '1.0.0'
9
- CLOBBER.include("wordcram.#{WORDCRAM_VERSION}.zip")
10
-
11
- desc "download, and copy to wordcram/lib"
12
- task :default => [:download, :unpack_library, :copy_jars]
13
-
14
- desc "download wordcram upstream sources"
15
- task :download => ["wordcram.#{WORDCRAM_VERSION}.zip"]
16
-
17
- file "wordcram.#{WORDCRAM_VERSION}.zip" do
18
- begin
19
- sh "wget http://wordcram.org/wordcram.#{WORDCRAM_VERSION}.zip"
20
- rescue
21
- warn(WARNING)
22
- end
23
- check_sha256("wordcram.#{WORDCRAM_VERSION}.zip", "d6b936db3628806099eba3f309fad81dae7f3db5e2a2846742c959b03eb0d25f")
24
- end
25
-
26
- desc "unpack wordcram library"
27
- task :unpack_library do
28
- sh "unzip wordcram.#{WORDCRAM_VERSION}.zip"
29
- end
30
-
31
- directory "../lib"
32
-
33
- desc "copy jars"
34
- task :copy_jars => ["../lib"] do
35
- sh "cp -v WordCram/library/WordCram.jar ../lib/WordCram.jar"
36
- sh "cp -v WordCram/library/cue.language.jar ../lib/cue.language.jar"
37
- sh "cp -v WordCram/library/jsoup-1.7.2.jar ../lib/jsoup-1.7.2.jar"
38
- end
39
-
40
- def check_sha256(filename, expected_hash)
41
- require "digest"
42
- sha256 = Digest::SHA256.new
43
- File.open(filename, "r") do |f|
44
- while buf = f.read(4096)
45
- sha256.update(buf)
46
- end
47
- end
48
- if sha256.hexdigest != expected_hash
49
- raise "bad sha256 checksum for #{filename} (expected #{expected_hash} got #{sha256.hexdigest})"
50
- end
51
- end