lda-ruby 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +5 -0
- data/CHANGELOG +22 -0
- data/README +21 -0
- data/README.markdown +38 -0
- data/Rakefile +58 -0
- data/VERSION.yml +4 -0
- data/ext/lda-ruby/Makefile +181 -0
- data/ext/lda-ruby/cokus.c +145 -0
- data/ext/lda-ruby/cokus.h +27 -0
- data/ext/lda-ruby/extconf.rb +9 -0
- data/ext/lda-ruby/lda-alpha.c +96 -0
- data/ext/lda-ruby/lda-alpha.h +21 -0
- data/ext/lda-ruby/lda-data.c +67 -0
- data/ext/lda-ruby/lda-data.h +14 -0
- data/ext/lda-ruby/lda-inference.c +1007 -0
- data/ext/lda-ruby/lda-inference.h +63 -0
- data/ext/lda-ruby/lda-model.c +345 -0
- data/ext/lda-ruby/lda-model.h +29 -0
- data/ext/lda-ruby/lda.h +54 -0
- data/ext/lda-ruby/utils.c +111 -0
- data/ext/lda-ruby/utils.h +18 -0
- data/lda-ruby.gemspec +78 -0
- data/lib/lda-ruby.rb +168 -0
- data/lib/lda-ruby/corpus/corpus.rb +34 -0
- data/lib/lda-ruby/corpus/data_corpus.rb +22 -0
- data/lib/lda-ruby/corpus/directory_corpus.rb +25 -0
- data/lib/lda-ruby/corpus/text_corpus.rb +22 -0
- data/lib/lda-ruby/document/data_document.rb +30 -0
- data/lib/lda-ruby/document/document.rb +36 -0
- data/lib/lda-ruby/document/text_document.rb +37 -0
- data/lib/lda-ruby/vocabulary.rb +46 -0
- data/license.txt +504 -0
- data/test/data/.gitignore +2 -0
- data/test/data/docs.dat +46 -0
- data/test/data/wiki-test-docs.yml +123 -0
- data/test/lda_ruby_test.rb +274 -0
- data/test/test_helper.rb +10 -0
- metadata +95 -0
data/CHANGELOG
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Version 0.3.1
|
2
|
+
=============
|
3
|
+
|
4
|
+
- top_words method now returns actual words if they exist in the vocabulary
|
5
|
+
|
6
|
+
Version 0.3.0
|
7
|
+
=============
|
8
|
+
|
9
|
+
- Completely broke backwards compatibility
|
10
|
+
- Reworked many classes to make functionality more reasonable
|
11
|
+
- Added ability to load documents from text files
|
12
|
+
|
13
|
+
Version 0.2.3
|
14
|
+
=============
|
15
|
+
|
16
|
+
- Bug fixes by Todd Foster
|
17
|
+
|
18
|
+
|
19
|
+
Version 0.2.2
|
20
|
+
=============
|
21
|
+
|
22
|
+
- First stable release
|
data/README
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Latent Dirichlet Allocation – Ruby Wrapper
|
2
|
+
|
3
|
+
This wrapper is based on C-code by David M. Blei. In a nutshell, it can be used to automatically cluster documents into topics. The number of topics are chosen beforehand and the topics found are usually fairly intuitive. Details of the implementation can be found in the paper by Blei, Ng, and Jordan.
|
4
|
+
|
5
|
+
The original C code relied on files for the input and output. We felt it was necessary to depart from that model and use Ruby objects for these steps instead. The only file necessary will be the data file (in a format similar to that used by SVMlight). Optionally you may need a vocabulary file to be able to extract the words belonging to topics.
|
6
|
+
|
7
|
+
Example usage:
|
8
|
+
|
9
|
+
require 'lda'
|
10
|
+
corpus = Lda::DataCorpus.new("data/data_file.dat")
|
11
|
+
lda = Lda::Lda.new(corpus) # create an Lda object for training
|
12
|
+
lda.em("random") # run EM algorithm using random starting points
|
13
|
+
lda.load_vocabulary("data/vocab.txt")
|
14
|
+
lda.print_topics(20) # print the topic 20 words per topic
|
15
|
+
|
16
|
+
You can check out the mailing list for this project if you have any questions or mail lda-ruby@groups.google.com [email link]. If you have general questions about Latent Dirichlet Allocation, I urge you to use the topic models mailing list, since the people who monitor that are very knowledgeable.
|
17
|
+
|
18
|
+
|
19
|
+
References
|
20
|
+
|
21
|
+
Blei, David M., Ng, Andrew Y., and Jordan, Michael I. 2003. Latent dirichlet allocation. Journal of Machine Learning Research. 3 (Mar. 2003), 993-1022.
|
data/README.markdown
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
# Latent Dirichlet Allocation – Ruby Wrapper
|
2
|
+
|
3
|
+
## What is LDA-Ruby?
|
4
|
+
|
5
|
+
This wrapper is based on C-code by David M. Blei. In a nutshell, it can be used to automatically cluster documents into topics. The number of topics are chosen beforehand and the topics found are usually fairly intuitive. Details of the implementation can be found in the paper by Blei, Ng, and Jordan.
|
6
|
+
|
7
|
+
The original C code relied on files for the input and output. We felt it was necessary to depart from that model and use Ruby objects for these steps instead. The only file necessary will be the data file (in a format similar to that used by [SVMlight][svmlight]). Optionally you may need a vocabulary file to be able to extract the words belonging to topics.
|
8
|
+
|
9
|
+
### Example usage:
|
10
|
+
|
11
|
+
require 'lda-ruby'
|
12
|
+
corpus = Lda::DataCorpus.new("data/data_file.dat")
|
13
|
+
lda = Lda::Lda.new(corpus) # create an Lda object for training
|
14
|
+
lda.em("random") # run EM algorithm using random starting points
|
15
|
+
lda.load_vocabulary("data/vocab.txt")
|
16
|
+
lda.print_topics(20) # print the topic 20 words per topic
|
17
|
+
|
18
|
+
You can check out the mailing list for this project if you have any questions or mail lda-ruby@groups.google.com [email link]. If you have general questions about Latent Dirichlet Allocation, I urge you to use the [topic models mailing list][topic-models], since the people who monitor that are very knowledgeable.
|
19
|
+
|
20
|
+
## Resources
|
21
|
+
|
22
|
+
+ [Blog post about LDA-Ruby][lda-ruby]
|
23
|
+
+ [David Blei's lda-c code][blei]
|
24
|
+
+ [Wikipedia article on LDA][wikipedia]
|
25
|
+
+ [Sample AP data][ap-data]
|
26
|
+
|
27
|
+
|
28
|
+
## References
|
29
|
+
|
30
|
+
Blei, David M., Ng, Andrew Y., and Jordan, Michael I. 2003. Latent dirichlet allocation. Journal of Machine Learning Research. 3 (Mar. 2003), 993-1022 [[pdf][pdf]].
|
31
|
+
|
32
|
+
[svmlight]: http://svmlight.joachims.org
|
33
|
+
[lda-ruby]: http://mendicantbug.com/2008/11/17/lda-in-ruby/
|
34
|
+
[blei]: http://www.cs.princeton.edu/~blei/lda-c/
|
35
|
+
[wikipedia]: http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation
|
36
|
+
[ap-data]: http://www.cs.princeton.edu/~blei/lda-c/ap.tgz
|
37
|
+
[pdf]: http://www.cs.princeton.edu/picasso/mats/BleiNgJordan2003_blei.pdf
|
38
|
+
[topic-models]: https://lists.cs.princeton.edu/mailman/listinfo/topic-models
|
data/Rakefile
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "lda-ruby"
|
8
|
+
gem.summary = %Q{Ruby port of Latent Dirichlet Allocation by David M. Blei.}
|
9
|
+
gem.description = %Q{Ruby port of Latent Dirichlet Allocation by David M. Blei. See http://www.cs.princeton.edu/~blei/lda-c/.}
|
10
|
+
gem.email = "jasonmadams@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/ealdent/lda-ruby"
|
12
|
+
gem.authors = ['David Blei', 'Jason Adams']
|
13
|
+
gem.extensions = ['ext/lda-ruby/extconf.rb']
|
14
|
+
gem.require_paths = ['lib', 'ext']
|
15
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
16
|
+
end
|
17
|
+
|
18
|
+
rescue LoadError
|
19
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
20
|
+
end
|
21
|
+
|
22
|
+
require 'rake/testtask'
|
23
|
+
Rake::TestTask.new(:test) do |test|
|
24
|
+
test.libs << 'lib' << 'test'
|
25
|
+
test.pattern = 'test/**/*_test.rb'
|
26
|
+
test.verbose = true
|
27
|
+
end
|
28
|
+
|
29
|
+
begin
|
30
|
+
require 'rcov/rcovtask'
|
31
|
+
Rcov::RcovTask.new do |test|
|
32
|
+
test.libs << 'test'
|
33
|
+
test.pattern = 'test/**/*_test.rb'
|
34
|
+
test.verbose = true
|
35
|
+
end
|
36
|
+
rescue LoadError
|
37
|
+
task :rcov do
|
38
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
task :default => :test
|
43
|
+
|
44
|
+
require 'rake/rdoctask'
|
45
|
+
Rake::RDocTask.new do |rdoc|
|
46
|
+
if File.exist?('VERSION.yml')
|
47
|
+
config = YAML.load(File.read('VERSION.yml'))
|
48
|
+
version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
|
49
|
+
else
|
50
|
+
version = ""
|
51
|
+
end
|
52
|
+
|
53
|
+
rdoc.rdoc_dir = 'rdoc'
|
54
|
+
rdoc.title = "lda-ruby #{version}"
|
55
|
+
rdoc.rdoc_files.include('README*')
|
56
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
57
|
+
end
|
58
|
+
|
data/VERSION.yml
ADDED
@@ -0,0 +1,181 @@
|
|
1
|
+
|
2
|
+
SHELL = /bin/sh
|
3
|
+
|
4
|
+
#### Start of system configuration section. ####
|
5
|
+
|
6
|
+
srcdir = .
|
7
|
+
topdir = /home/taf2/.local/include/ruby-1.9.1
|
8
|
+
hdrdir = /home/taf2/.local/include/ruby-1.9.1
|
9
|
+
arch_hdrdir = /home/taf2/.local/include/ruby-1.9.1/$(arch)
|
10
|
+
VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
|
11
|
+
prefix = $(DESTDIR)/home/taf2/.local
|
12
|
+
exec_prefix = $(prefix)
|
13
|
+
vendorhdrdir = $(rubyhdrdir)/vendor_ruby
|
14
|
+
sitehdrdir = $(rubyhdrdir)/site_ruby
|
15
|
+
rubyhdrdir = $(includedir)/$(RUBY_INSTALL_NAME)-$(ruby_version)
|
16
|
+
vendordir = $(libdir)/$(RUBY_INSTALL_NAME)/vendor_ruby
|
17
|
+
sitedir = $(libdir)/$(RUBY_INSTALL_NAME)/site_ruby
|
18
|
+
mandir = $(datarootdir)/man
|
19
|
+
localedir = $(datarootdir)/locale
|
20
|
+
libdir = $(exec_prefix)/lib
|
21
|
+
psdir = $(docdir)
|
22
|
+
pdfdir = $(docdir)
|
23
|
+
dvidir = $(docdir)
|
24
|
+
htmldir = $(docdir)
|
25
|
+
infodir = $(datarootdir)/info
|
26
|
+
docdir = $(datarootdir)/doc/$(PACKAGE)
|
27
|
+
oldincludedir = $(DESTDIR)/usr/include
|
28
|
+
includedir = $(prefix)/include
|
29
|
+
localstatedir = $(prefix)/var
|
30
|
+
sharedstatedir = $(prefix)/com
|
31
|
+
sysconfdir = $(prefix)/etc
|
32
|
+
datadir = $(datarootdir)
|
33
|
+
datarootdir = $(prefix)/share
|
34
|
+
libexecdir = $(exec_prefix)/libexec
|
35
|
+
sbindir = $(exec_prefix)/sbin
|
36
|
+
bindir = $(exec_prefix)/bin
|
37
|
+
rubylibdir = $(libdir)/$(ruby_install_name)/$(ruby_version)
|
38
|
+
archdir = $(rubylibdir)/$(arch)
|
39
|
+
sitelibdir = $(sitedir)/$(ruby_version)
|
40
|
+
sitearchdir = $(sitelibdir)/$(sitearch)
|
41
|
+
vendorlibdir = $(vendordir)/$(ruby_version)
|
42
|
+
vendorarchdir = $(vendorlibdir)/$(sitearch)
|
43
|
+
|
44
|
+
CC = gcc
|
45
|
+
CXX = g++
|
46
|
+
LIBRUBY = $(LIBRUBY_SO)
|
47
|
+
LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
|
48
|
+
LIBRUBYARG_SHARED = -Wl,-R -Wl,$(libdir) -L$(libdir) -l$(RUBY_SO_NAME)
|
49
|
+
LIBRUBYARG_STATIC = -Wl,-R -Wl,$(libdir) -L$(libdir) -l$(RUBY_SO_NAME)-static
|
50
|
+
OUTFLAG = -o
|
51
|
+
COUTFLAG = -o
|
52
|
+
|
53
|
+
RUBY_EXTCONF_H =
|
54
|
+
cflags = $(optflags) $(debugflags) $(warnflags)
|
55
|
+
optflags = -O0
|
56
|
+
debugflags = -g3 -ggdb
|
57
|
+
warnflags = -Wall -Wno-parentheses
|
58
|
+
CFLAGS = -fPIC $(cflags) -fPIC -Wall -ggdb -O0
|
59
|
+
INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
|
60
|
+
DEFS =
|
61
|
+
CPPFLAGS = -D USE_RUBY $(DEFS) $(cppflags)
|
62
|
+
CXXFLAGS = $(CFLAGS) $(cxxflags)
|
63
|
+
ldflags = -L. -rdynamic -Wl,-export-dynamic
|
64
|
+
dldflags =
|
65
|
+
archflag =
|
66
|
+
DLDFLAGS = $(ldflags) $(dldflags) $(archflag)
|
67
|
+
LDSHARED = $(CC) -shared
|
68
|
+
LDSHAREDXX = $(CXX) -shared
|
69
|
+
AR = ar
|
70
|
+
EXEEXT =
|
71
|
+
|
72
|
+
RUBY_INSTALL_NAME = ruby
|
73
|
+
RUBY_SO_NAME = ruby
|
74
|
+
arch = x86_64-linux
|
75
|
+
sitearch = x86_64-linux
|
76
|
+
ruby_version = 1.9.1
|
77
|
+
ruby = /home/taf2/.local/bin/ruby
|
78
|
+
RUBY = $(ruby)
|
79
|
+
RM = rm -f
|
80
|
+
RM_RF = $(RUBY) -run -e rm -- -rf
|
81
|
+
RMDIRS = $(RUBY) -run -e rmdir -- -p
|
82
|
+
MAKEDIRS = mkdir -p
|
83
|
+
INSTALL = /usr/bin/install -c
|
84
|
+
INSTALL_PROG = $(INSTALL) -m 0755
|
85
|
+
INSTALL_DATA = $(INSTALL) -m 644
|
86
|
+
COPY = cp
|
87
|
+
|
88
|
+
#### End of system configuration section. ####
|
89
|
+
|
90
|
+
preload =
|
91
|
+
|
92
|
+
libpath = . $(libdir)
|
93
|
+
LIBPATH = -L. -L$(libdir) -Wl,-R$(libdir)
|
94
|
+
DEFFILE =
|
95
|
+
|
96
|
+
CLEANFILES = mkmf.log
|
97
|
+
DISTCLEANFILES =
|
98
|
+
DISTCLEANDIRS =
|
99
|
+
|
100
|
+
extout =
|
101
|
+
extout_prefix =
|
102
|
+
target_prefix =
|
103
|
+
LOCAL_LIBS =
|
104
|
+
LIBS = $(LIBRUBYARG_SHARED) -lpthread -lrt -ldl -lcrypt -lm -lc
|
105
|
+
SRCS = lda-model.c lda-data.c utils.c lda-alpha.c cokus.c lda-inference.c
|
106
|
+
OBJS = lda-model.o lda-data.o utils.o lda-alpha.o cokus.o lda-inference.o
|
107
|
+
TARGET = lda_ext
|
108
|
+
DLLIB = $(TARGET).so
|
109
|
+
EXTSTATIC =
|
110
|
+
STATIC_LIB =
|
111
|
+
|
112
|
+
BINDIR = $(bindir)
|
113
|
+
RUBYCOMMONDIR = $(sitedir)$(target_prefix)
|
114
|
+
RUBYLIBDIR = $(sitelibdir)$(target_prefix)
|
115
|
+
RUBYARCHDIR = $(sitearchdir)$(target_prefix)
|
116
|
+
HDRDIR = $(rubyhdrdir)/ruby$(target_prefix)
|
117
|
+
ARCHHDRDIR = $(rubyhdrdir)/$(arch)/ruby$(target_prefix)
|
118
|
+
|
119
|
+
TARGET_SO = $(DLLIB)
|
120
|
+
CLEANLIBS = $(TARGET).so
|
121
|
+
CLEANOBJS = *.o *.bak
|
122
|
+
|
123
|
+
all: $(DLLIB)
|
124
|
+
static: $(STATIC_LIB)
|
125
|
+
|
126
|
+
clean-rb-default::
|
127
|
+
clean-rb::
|
128
|
+
clean-so::
|
129
|
+
clean: clean-so clean-rb-default clean-rb
|
130
|
+
@-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
|
131
|
+
|
132
|
+
distclean-rb-default::
|
133
|
+
distclean-rb::
|
134
|
+
distclean-so::
|
135
|
+
distclean: clean distclean-so distclean-rb-default distclean-rb
|
136
|
+
@-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
|
137
|
+
@-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
|
138
|
+
@-$(RMDIRS) $(DISTCLEANDIRS)
|
139
|
+
|
140
|
+
realclean: distclean
|
141
|
+
install: install-so install-rb
|
142
|
+
|
143
|
+
install-so: $(RUBYARCHDIR)
|
144
|
+
install-so: $(RUBYARCHDIR)/$(DLLIB)
|
145
|
+
$(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
|
146
|
+
$(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
|
147
|
+
install-rb: pre-install-rb install-rb-default
|
148
|
+
install-rb-default: pre-install-rb-default
|
149
|
+
pre-install-rb: Makefile
|
150
|
+
pre-install-rb-default: Makefile
|
151
|
+
$(RUBYARCHDIR):
|
152
|
+
$(MAKEDIRS) $@
|
153
|
+
|
154
|
+
site-install: site-install-so site-install-rb
|
155
|
+
site-install-so: install-so
|
156
|
+
site-install-rb: install-rb
|
157
|
+
|
158
|
+
.SUFFIXES: .c .m .cc .cxx .cpp .C .o
|
159
|
+
|
160
|
+
.cc.o:
|
161
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
162
|
+
|
163
|
+
.cxx.o:
|
164
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
165
|
+
|
166
|
+
.cpp.o:
|
167
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
168
|
+
|
169
|
+
.C.o:
|
170
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
171
|
+
|
172
|
+
.c.o:
|
173
|
+
$(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $<
|
174
|
+
|
175
|
+
$(DLLIB): $(OBJS) Makefile
|
176
|
+
@-$(RM) $(@)
|
177
|
+
$(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
|
178
|
+
|
179
|
+
|
180
|
+
|
181
|
+
$(OBJS): $(hdrdir)/ruby.h $(hdrdir)/ruby/defines.h $(arch_hdrdir)/ruby/config.h
|
@@ -0,0 +1,145 @@
|
|
1
|
+
// This is the ``Mersenne Twister'' random number generator MT19937, which
|
2
|
+
// generates pseudorandom integers uniformly distributed in 0..(2^32 - 1)
|
3
|
+
// starting from any odd seed in 0..(2^32 - 1). This version is a recode
|
4
|
+
// by Shawn Cokus (Cokus@math.washington.edu) on March 8, 1998 of a version by
|
5
|
+
// Takuji Nishimura (who had suggestions from Topher Cooper and Marc Rieffel in
|
6
|
+
// July-August 1997).
|
7
|
+
//
|
8
|
+
// Effectiveness of the recoding (on Goedel2.math.washington.edu, a DEC Alpha
|
9
|
+
// running OSF/1) using GCC -O3 as a compiler: before recoding: 51.6 sec. to
|
10
|
+
// generate 300 million random numbers; after recoding: 24.0 sec. for the same
|
11
|
+
// (i.e., 46.5% of original time), so speed is now about 12.5 million random
|
12
|
+
// number generations per second on this machine.
|
13
|
+
//
|
14
|
+
// According to the URL <http://www.math.keio.ac.jp/~matumoto/emt.html>
|
15
|
+
// (and paraphrasing a bit in places), the Mersenne Twister is ``designed
|
16
|
+
// with consideration of the flaws of various existing generators,'' has
|
17
|
+
// a period of 2^19937 - 1, gives a sequence that is 623-dimensionally
|
18
|
+
// equidistributed, and ``has passed many stringent tests, including the
|
19
|
+
// die-hard test of G. Marsaglia and the load test of P. Hellekalek and
|
20
|
+
// S. Wegenkittl.'' It is efficient in memory usage (typically using 2506
|
21
|
+
// to 5012 bytes of static data, depending on data type sizes, and the code
|
22
|
+
// is quite short as well). It generates random numbers in batches of 624
|
23
|
+
// at a time, so the caching and pipelining of modern systems is exploited.
|
24
|
+
// It is also divide- and mod-free.
|
25
|
+
//
|
26
|
+
// This library is free software; you can redistribute it and/or modify it
|
27
|
+
// under the terms of the GNU Library General Public License as published by
|
28
|
+
// the Free Software Foundation (either version 2 of the License or, at your
|
29
|
+
// option, any later version). This library is distributed in the hope that
|
30
|
+
// it will be useful, but WITHOUT ANY WARRANTY, without even the implied
|
31
|
+
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
|
32
|
+
// the GNU Library General Public License for more details. You should have
|
33
|
+
// received a copy of the GNU Library General Public License along with this
|
34
|
+
// library; if not, write to the Free Software Foundation, Inc., 59 Temple
|
35
|
+
// Place, Suite 330, Boston, MA 02111-1307, USA.
|
36
|
+
//
|
37
|
+
// The code as Shawn received it included the following notice:
|
38
|
+
//
|
39
|
+
// Copyright (C) 1997 Makoto Matsumoto and Takuji Nishimura. When
|
40
|
+
// you use this, send an e-mail to <matumoto@math.keio.ac.jp> with
|
41
|
+
// an appropriate reference to your work.
|
42
|
+
//
|
43
|
+
// It would be nice to CC: <Cokus@math.washington.edu> when you write.
|
44
|
+
//
|
45
|
+
|
46
|
+
#include "cokus.h"
|
47
|
+
|
48
|
+
static uint32 state[N+1]; // state vector + 1 extra to not violate ANSI C
|
49
|
+
static uint32 *next; // next random value is computed from here
|
50
|
+
static int left = -1; // can *next++ this many times before reloading
|
51
|
+
|
52
|
+
void seedMT(uint32 seed)
|
53
|
+
{
|
54
|
+
//
|
55
|
+
// We initialize state[0..(N-1)] via the generator
|
56
|
+
//
|
57
|
+
// x_new = (69069 * x_old) mod 2^32
|
58
|
+
//
|
59
|
+
// from Line 15 of Table 1, p. 106, Sec. 3.3.4 of Knuth's
|
60
|
+
// _The Art of Computer Programming_, Volume 2, 3rd ed.
|
61
|
+
//
|
62
|
+
// Notes (SJC): I do not know what the initial state requirements
|
63
|
+
// of the Mersenne Twister are, but it seems this seeding generator
|
64
|
+
// could be better. It achieves the maximum period for its modulus
|
65
|
+
// (2^30) iff x_initial is odd (p. 20-21, Sec. 3.2.1.2, Knuth); if
|
66
|
+
// x_initial can be even, you have sequences like 0, 0, 0, ...;
|
67
|
+
// 2^31, 2^31, 2^31, ...; 2^30, 2^30, 2^30, ...; 2^29, 2^29 + 2^31,
|
68
|
+
// 2^29, 2^29 + 2^31, ..., etc. so I force seed to be odd below.
|
69
|
+
//
|
70
|
+
// Even if x_initial is odd, if x_initial is 1 mod 4 then
|
71
|
+
//
|
72
|
+
// the lowest bit of x is always 1,
|
73
|
+
// the next-to-lowest bit of x is always 0,
|
74
|
+
// the 2nd-from-lowest bit of x alternates ... 0 1 0 1 0 1 0 1 ... ,
|
75
|
+
// the 3rd-from-lowest bit of x 4-cycles ... 0 1 1 0 0 1 1 0 ... ,
|
76
|
+
// the 4th-from-lowest bit of x has the 8-cycle ... 0 0 0 1 1 1 1 0 ... ,
|
77
|
+
// ...
|
78
|
+
//
|
79
|
+
// and if x_initial is 3 mod 4 then
|
80
|
+
//
|
81
|
+
// the lowest bit of x is always 1,
|
82
|
+
// the next-to-lowest bit of x is always 1,
|
83
|
+
// the 2nd-from-lowest bit of x alternates ... 0 1 0 1 0 1 0 1 ... ,
|
84
|
+
// the 3rd-from-lowest bit of x 4-cycles ... 0 0 1 1 0 0 1 1 ... ,
|
85
|
+
// the 4th-from-lowest bit of x has the 8-cycle ... 0 0 1 1 1 1 0 0 ... ,
|
86
|
+
// ...
|
87
|
+
//
|
88
|
+
// The generator's potency (min. s>=0 with (69069-1)^s = 0 mod 2^32) is
|
89
|
+
// 16, which seems to be alright by p. 25, Sec. 3.2.1.3 of Knuth. It
|
90
|
+
// also does well in the dimension 2..5 spectral tests, but it could be
|
91
|
+
// better in dimension 6 (Line 15, Table 1, p. 106, Sec. 3.3.4, Knuth).
|
92
|
+
//
|
93
|
+
// Note that the random number user does not see the values generated
|
94
|
+
// here directly since reloadMT() will always munge them first, so maybe
|
95
|
+
// none of all of this matters. In fact, the seed values made here could
|
96
|
+
// even be extra-special desirable if the Mersenne Twister theory says
|
97
|
+
// so-- that's why the only change I made is to restrict to odd seeds.
|
98
|
+
//
|
99
|
+
|
100
|
+
register uint32 x = (seed | 1U) & 0xFFFFFFFFU, *s = state;
|
101
|
+
register int j;
|
102
|
+
|
103
|
+
for(left=0, *s++=x, j=N; --j;
|
104
|
+
*s++ = (x*=69069U) & 0xFFFFFFFFU);
|
105
|
+
}
|
106
|
+
|
107
|
+
|
108
|
+
uint32 reloadMT(void)
|
109
|
+
{
|
110
|
+
register uint32 *p0=state, *p2=state+2, *pM=state+M, s0, s1;
|
111
|
+
register int j;
|
112
|
+
|
113
|
+
if(left < -1)
|
114
|
+
seedMT(4357U);
|
115
|
+
|
116
|
+
left=N-1, next=state+1;
|
117
|
+
|
118
|
+
for(s0=state[0], s1=state[1], j=N-M+1; --j; s0=s1, s1=*p2++)
|
119
|
+
*p0++ = *pM++ ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U);
|
120
|
+
|
121
|
+
for(pM=state, j=M; --j; s0=s1, s1=*p2++)
|
122
|
+
*p0++ = *pM++ ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U);
|
123
|
+
|
124
|
+
s1=state[0], *p0 = *pM ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U);
|
125
|
+
s1 ^= (s1 >> 11);
|
126
|
+
s1 ^= (s1 << 7) & 0x9D2C5680U;
|
127
|
+
s1 ^= (s1 << 15) & 0xEFC60000U;
|
128
|
+
return(s1 ^ (s1 >> 18));
|
129
|
+
}
|
130
|
+
|
131
|
+
uint32 randomMT(void)
|
132
|
+
{
|
133
|
+
uint32 y;
|
134
|
+
|
135
|
+
if(--left < 0)
|
136
|
+
return(reloadMT());
|
137
|
+
|
138
|
+
y = *next++;
|
139
|
+
y ^= (y >> 11);
|
140
|
+
y ^= (y << 7) & 0x9D2C5680U;
|
141
|
+
y ^= (y << 15) & 0xEFC60000U;
|
142
|
+
y ^= (y >> 18);
|
143
|
+
return(y);
|
144
|
+
}
|
145
|
+
|