opener-opinion-detector-base 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +101 -0
- data/bin/opinion-detector-base +19 -0
- data/core/annotation.cfg.erb +9 -0
- data/core/packages/KafNafParser-1.4.tar.gz +0 -0
- data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
- data/core/python-scripts/LICENSE +339 -0
- data/core/python-scripts/README.md +226 -0
- data/core/python-scripts/classify_kaf_naf_file.py +499 -0
- data/core/python-scripts/cross_validation.py +634 -0
- data/core/python-scripts/generate_folds.py +134 -0
- data/core/python-scripts/models.cfg +10 -0
- data/core/python-scripts/my_templates/README +33 -0
- data/core/python-scripts/my_templates/templates_exp.only0.txt +6 -0
- data/core/python-scripts/my_templates/templates_exp.pol0.txt +10 -0
- data/core/python-scripts/my_templates/templates_exp.red.txt +7 -0
- data/core/python-scripts/my_templates/templates_exp.txt +10 -0
- data/core/python-scripts/my_templates/templates_holder.only0.txt +11 -0
- data/core/python-scripts/my_templates/templates_holder.red.txt +9 -0
- data/core/python-scripts/my_templates/templates_holder.txt +10 -0
- data/core/python-scripts/my_templates/templates_target.only0.txt +11 -0
- data/core/python-scripts/my_templates/templates_target.red.txt +9 -0
- data/core/python-scripts/my_templates/templates_target.txt +10 -0
- data/core/python-scripts/run_all_experiments.sh +49 -0
- data/core/python-scripts/run_basic.py +20 -0
- data/core/python-scripts/run_experiment.sh +42 -0
- data/core/python-scripts/scripts/__init__.py +1 -0
- data/core/python-scripts/scripts/config_manager.py +314 -0
- data/core/python-scripts/scripts/crfutils.py +215 -0
- data/core/python-scripts/scripts/extract_feats_relations.py +295 -0
- data/core/python-scripts/scripts/extract_features.py +376 -0
- data/core/python-scripts/scripts/feats_to_crf.exp.py +105 -0
- data/core/python-scripts/scripts/lexicons.py +44 -0
- data/core/python-scripts/scripts/link_entities_distance.py +77 -0
- data/core/python-scripts/scripts/relation_classifier.py +250 -0
- data/core/python-scripts/train.py +566 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +22 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +47 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +390 -0
- data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/__init__.py +14 -0
- data/core/site-packages/pre_build/KafNafParser/__init__.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/constituency_data.py +125 -0
- data/core/site-packages/pre_build/KafNafParser/constituency_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/coreference_data.py +52 -0
- data/core/site-packages/pre_build/KafNafParser/coreference_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/dependency_data.py +78 -0
- data/core/site-packages/pre_build/KafNafParser/dependency_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/entity_data.py +59 -0
- data/core/site-packages/pre_build/KafNafParser/entity_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/external_references_data.py +41 -0
- data/core/site-packages/pre_build/KafNafParser/external_references_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +2 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +205 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +309 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/features_data.py +131 -0
- data/core/site-packages/pre_build/KafNafParser/features_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/header_data.py +127 -0
- data/core/site-packages/pre_build/KafNafParser/header_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/opinion_data.py +211 -0
- data/core/site-packages/pre_build/KafNafParser/opinion_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/references_data.py +23 -0
- data/core/site-packages/pre_build/KafNafParser/references_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/span_data.py +63 -0
- data/core/site-packages/pre_build/KafNafParser/span_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/term_data.py +111 -0
- data/core/site-packages/pre_build/KafNafParser/term_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +42 -0
- data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/text_data.py +99 -0
- data/core/site-packages/pre_build/KafNafParser/text_data.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +14 -0
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +23 -0
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/VUA_pylib/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/common/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/common/common.py +28 -0
- data/core/site-packages/pre_build/VUA_pylib/common/common.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +156 -0
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +121 -0
- data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +72 -0
- data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
- data/core/vendor/src/crfsuite/AUTHORS +1 -0
- data/core/vendor/src/crfsuite/COPYING +27 -0
- data/core/vendor/src/crfsuite/ChangeLog +103 -0
- data/core/vendor/src/crfsuite/INSTALL +236 -0
- data/core/vendor/src/crfsuite/Makefile.am +19 -0
- data/core/vendor/src/crfsuite/Makefile.in +783 -0
- data/core/vendor/src/crfsuite/README +183 -0
- data/core/vendor/src/crfsuite/aclocal.m4 +9018 -0
- data/core/vendor/src/crfsuite/autogen.sh +38 -0
- data/core/vendor/src/crfsuite/compile +143 -0
- data/core/vendor/src/crfsuite/config.guess +1502 -0
- data/core/vendor/src/crfsuite/config.h.in +198 -0
- data/core/vendor/src/crfsuite/config.sub +1714 -0
- data/core/vendor/src/crfsuite/configure +14273 -0
- data/core/vendor/src/crfsuite/configure.in +149 -0
- data/core/vendor/src/crfsuite/crfsuite.sln +42 -0
- data/core/vendor/src/crfsuite/depcomp +630 -0
- data/core/vendor/src/crfsuite/example/chunking.py +49 -0
- data/core/vendor/src/crfsuite/example/crfutils.py +179 -0
- data/core/vendor/src/crfsuite/example/ner.py +270 -0
- data/core/vendor/src/crfsuite/example/pos.py +78 -0
- data/core/vendor/src/crfsuite/example/template.py +88 -0
- data/core/vendor/src/crfsuite/frontend/Makefile.am +29 -0
- data/core/vendor/src/crfsuite/frontend/Makefile.in +640 -0
- data/core/vendor/src/crfsuite/frontend/dump.c +116 -0
- data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +129 -0
- data/core/vendor/src/crfsuite/frontend/iwa.c +273 -0
- data/core/vendor/src/crfsuite/frontend/iwa.h +65 -0
- data/core/vendor/src/crfsuite/frontend/learn.c +439 -0
- data/core/vendor/src/crfsuite/frontend/main.c +137 -0
- data/core/vendor/src/crfsuite/frontend/option.c +93 -0
- data/core/vendor/src/crfsuite/frontend/option.h +86 -0
- data/core/vendor/src/crfsuite/frontend/readdata.h +38 -0
- data/core/vendor/src/crfsuite/frontend/reader.c +136 -0
- data/core/vendor/src/crfsuite/frontend/tag.c +427 -0
- data/core/vendor/src/crfsuite/genbinary.sh.in +15 -0
- data/core/vendor/src/crfsuite/include/Makefile.am +11 -0
- data/core/vendor/src/crfsuite/include/Makefile.in +461 -0
- data/core/vendor/src/crfsuite/include/crfsuite.h +1063 -0
- data/core/vendor/src/crfsuite/include/crfsuite.hpp +555 -0
- data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +400 -0
- data/core/vendor/src/crfsuite/include/os.h +61 -0
- data/core/vendor/src/crfsuite/install-sh +520 -0
- data/core/vendor/src/crfsuite/lib/cqdb/COPYING +28 -0
- data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +21 -0
- data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +549 -0
- data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +86 -0
- data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +524 -0
- data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +587 -0
- data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +976 -0
- data/core/vendor/src/crfsuite/lib/crf/Makefile.am +46 -0
- data/core/vendor/src/crfsuite/lib/crf/Makefile.in +721 -0
- data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +216 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +353 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +705 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +943 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +352 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +994 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +550 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +492 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +236 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +272 -0
- data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +106 -0
- data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +118 -0
- data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +80 -0
- data/core/vendor/src/crfsuite/lib/crf/src/logging.c +91 -0
- data/core/vendor/src/crfsuite/lib/crf/src/logging.h +48 -0
- data/core/vendor/src/crfsuite/lib/crf/src/params.c +335 -0
- data/core/vendor/src/crfsuite/lib/crf/src/params.h +80 -0
- data/core/vendor/src/crfsuite/lib/crf/src/quark.c +172 -0
- data/core/vendor/src/crfsuite/lib/crf/src/quark.h +46 -0
- data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +1107 -0
- data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +160 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +408 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +242 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +507 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +338 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +435 -0
- data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +341 -0
- data/core/vendor/src/crfsuite/ltmain.sh +8413 -0
- data/core/vendor/src/crfsuite/missing +376 -0
- data/core/vendor/src/crfsuite/swig/Makefile.am +13 -0
- data/core/vendor/src/crfsuite/swig/Makefile.in +365 -0
- data/core/vendor/src/crfsuite/swig/crfsuite.cpp +2 -0
- data/core/vendor/src/crfsuite/swig/export.i +32 -0
- data/core/vendor/src/crfsuite/swig/python/README +92 -0
- data/core/vendor/src/crfsuite/swig/python/crfsuite.py +329 -0
- data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +14355 -0
- data/core/vendor/src/crfsuite/swig/python/export_wrap.h +63 -0
- data/core/vendor/src/crfsuite/swig/python/prepare.sh +9 -0
- data/core/vendor/src/crfsuite/swig/python/sample_tag.py +52 -0
- data/core/vendor/src/crfsuite/swig/python/sample_train.py +68 -0
- data/core/vendor/src/crfsuite/swig/python/setup.py +44 -0
- data/core/vendor/src/crfsuite/win32/stdint.h +679 -0
- data/core/vendor/src/liblbfgs/AUTHORS +1 -0
- data/core/vendor/src/liblbfgs/COPYING +22 -0
- data/core/vendor/src/liblbfgs/ChangeLog +120 -0
- data/core/vendor/src/liblbfgs/INSTALL +231 -0
- data/core/vendor/src/liblbfgs/Makefile.am +10 -0
- data/core/vendor/src/liblbfgs/Makefile.in +638 -0
- data/core/vendor/src/liblbfgs/NEWS +0 -0
- data/core/vendor/src/liblbfgs/README +71 -0
- data/core/vendor/src/liblbfgs/aclocal.m4 +6985 -0
- data/core/vendor/src/liblbfgs/autogen.sh +38 -0
- data/core/vendor/src/liblbfgs/config.guess +1411 -0
- data/core/vendor/src/liblbfgs/config.h.in +64 -0
- data/core/vendor/src/liblbfgs/config.sub +1500 -0
- data/core/vendor/src/liblbfgs/configure +21146 -0
- data/core/vendor/src/liblbfgs/configure.in +107 -0
- data/core/vendor/src/liblbfgs/depcomp +522 -0
- data/core/vendor/src/liblbfgs/include/lbfgs.h +745 -0
- data/core/vendor/src/liblbfgs/install-sh +322 -0
- data/core/vendor/src/liblbfgs/lbfgs.sln +26 -0
- data/core/vendor/src/liblbfgs/lib/Makefile.am +24 -0
- data/core/vendor/src/liblbfgs/lib/Makefile.in +499 -0
- data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +133 -0
- data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +294 -0
- data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +298 -0
- data/core/vendor/src/liblbfgs/lib/lbfgs.c +1371 -0
- data/core/vendor/src/liblbfgs/lib/lib.vcxproj +95 -0
- data/core/vendor/src/liblbfgs/ltmain.sh +6426 -0
- data/core/vendor/src/liblbfgs/missing +353 -0
- data/core/vendor/src/liblbfgs/sample/Makefile.am +15 -0
- data/core/vendor/src/liblbfgs/sample/Makefile.in +433 -0
- data/core/vendor/src/liblbfgs/sample/sample.c +81 -0
- data/core/vendor/src/liblbfgs/sample/sample.cpp +126 -0
- data/core/vendor/src/liblbfgs/sample/sample.vcxproj +105 -0
- data/core/vendor/src/svm_light/LICENSE.txt +59 -0
- data/core/vendor/src/svm_light/Makefile +105 -0
- data/core/vendor/src/svm_light/kernel.h +40 -0
- data/core/vendor/src/svm_light/svm_classify.c +197 -0
- data/core/vendor/src/svm_light/svm_common.c +985 -0
- data/core/vendor/src/svm_light/svm_common.h +301 -0
- data/core/vendor/src/svm_light/svm_hideo.c +1062 -0
- data/core/vendor/src/svm_light/svm_learn.c +4147 -0
- data/core/vendor/src/svm_light/svm_learn.h +169 -0
- data/core/vendor/src/svm_light/svm_learn_main.c +397 -0
- data/core/vendor/src/svm_light/svm_loqo.c +211 -0
- data/ext/hack/Rakefile +17 -0
- data/ext/hack/support.rb +88 -0
- data/lib/opener/opinion_detectors/base.rb +112 -0
- data/lib/opener/opinion_detectors/base/version.rb +7 -0
- data/lib/opener/opinion_detectors/configuration_creator.rb +86 -0
- data/lib/opener/opinion_detectors/de.rb +7 -0
- data/lib/opener/opinion_detectors/en.rb +7 -0
- data/lib/opener/opinion_detectors/it.rb +7 -0
- data/lib/opener/opinion_detectors/nl.rb +6 -0
- data/opener-opinion-detector-base.gemspec +35 -0
- data/pre_build_requirements.txt +3 -0
- metadata +374 -0
@@ -0,0 +1,352 @@
|
|
1
|
+
/*
|
2
|
+
* CRF1d feature generator (dyad features).
|
3
|
+
*
|
4
|
+
* Copyright (c) 2007-2010, Naoaki Okazaki
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
* * Redistributions of source code must retain the above copyright
|
10
|
+
* notice, this list of conditions and the following disclaimer.
|
11
|
+
* * Redistributions in binary form must reproduce the above copyright
|
12
|
+
* notice, this list of conditions and the following disclaimer in the
|
13
|
+
* documentation and/or other materials provided with the distribution.
|
14
|
+
* * Neither the names of the authors nor the names of its contributors
|
15
|
+
* may be used to endorse or promote products derived from this
|
16
|
+
* software without specific prior written permission.
|
17
|
+
*
|
18
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
19
|
+
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
20
|
+
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
21
|
+
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
22
|
+
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
23
|
+
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
24
|
+
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
25
|
+
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
26
|
+
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
27
|
+
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
28
|
+
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
|
+
*/
|
30
|
+
|
31
|
+
/* $Id$ */
|
32
|
+
|
33
|
+
|
34
|
+
#ifdef HAVE_CONFIG_H
|
35
|
+
#include <config.h>
|
36
|
+
#endif/*HAVE_CONFIG_H*/
|
37
|
+
|
38
|
+
#include <os.h>
|
39
|
+
|
40
|
+
#include <stdio.h>
|
41
|
+
#include <stdlib.h>
|
42
|
+
#include <string.h>
|
43
|
+
|
44
|
+
#include <crfsuite.h>
|
45
|
+
|
46
|
+
#include "logging.h"
|
47
|
+
#include "crf1d.h"
|
48
|
+
#include "rumavl.h" /* AVL tree library necessary for feature generation. */
|
49
|
+
|
50
|
+
/**
|
51
|
+
* Feature set.
|
52
|
+
*/
|
53
|
+
typedef struct {
|
54
|
+
RUMAVL* avl; /**< Root node of the AVL tree. */
|
55
|
+
int num; /**< Number of features in the AVL tree. */
|
56
|
+
} featureset_t;
|
57
|
+
|
58
|
+
|
59
|
+
#define COMP(a, b) ((a)>(b))-((a)<(b))
|
60
|
+
|
61
|
+
static int featureset_comp(const void *x, const void *y, size_t n, void *udata)
|
62
|
+
{
|
63
|
+
int ret = 0;
|
64
|
+
const crf1df_feature_t* f1 = (const crf1df_feature_t*)x;
|
65
|
+
const crf1df_feature_t* f2 = (const crf1df_feature_t*)y;
|
66
|
+
|
67
|
+
ret = COMP(f1->type, f2->type);
|
68
|
+
if (ret == 0) {
|
69
|
+
ret = COMP(f1->src, f2->src);
|
70
|
+
if (ret == 0) {
|
71
|
+
ret = COMP(f1->dst, f2->dst);
|
72
|
+
}
|
73
|
+
}
|
74
|
+
return ret;
|
75
|
+
}
|
76
|
+
|
77
|
+
static featureset_t* featureset_new()
|
78
|
+
{
|
79
|
+
featureset_t* set = NULL;
|
80
|
+
set = (featureset_t*)calloc(1, sizeof(featureset_t));
|
81
|
+
if (set != NULL) {
|
82
|
+
set->num = 0;
|
83
|
+
set->avl = rumavl_new(
|
84
|
+
sizeof(crf1df_feature_t), featureset_comp, NULL, NULL);
|
85
|
+
if (set->avl == NULL) {
|
86
|
+
free(set);
|
87
|
+
set = NULL;
|
88
|
+
}
|
89
|
+
}
|
90
|
+
return set;
|
91
|
+
}
|
92
|
+
|
93
|
+
static void featureset_delete(featureset_t* set)
|
94
|
+
{
|
95
|
+
if (set != NULL) {
|
96
|
+
rumavl_destroy(set->avl);
|
97
|
+
free(set);
|
98
|
+
}
|
99
|
+
}
|
100
|
+
|
101
|
+
static int featureset_add(featureset_t* set, const crf1df_feature_t* f)
|
102
|
+
{
|
103
|
+
/* Check whether if the feature already exists. */
|
104
|
+
crf1df_feature_t *p = (crf1df_feature_t*)rumavl_find(set->avl, f);
|
105
|
+
if (p == NULL) {
|
106
|
+
/* Insert the feature to the feature set. */
|
107
|
+
rumavl_insert(set->avl, f);
|
108
|
+
++set->num;
|
109
|
+
} else {
|
110
|
+
/* An existing feature: add the observation expectation. */
|
111
|
+
p->freq += f->freq;
|
112
|
+
}
|
113
|
+
return 0;
|
114
|
+
}
|
115
|
+
|
116
|
+
static crf1df_feature_t*
|
117
|
+
featureset_generate(
|
118
|
+
int *ptr_num_features,
|
119
|
+
featureset_t* set,
|
120
|
+
floatval_t minfreq
|
121
|
+
)
|
122
|
+
{
|
123
|
+
int n = 0, k = 0;
|
124
|
+
RUMAVL_NODE *node = NULL;
|
125
|
+
crf1df_feature_t *f = NULL;
|
126
|
+
crf1df_feature_t *features = NULL;
|
127
|
+
|
128
|
+
/* The first pass: count the number of valid features. */
|
129
|
+
while ((node = rumavl_node_next(set->avl, node, 1, (void**)&f)) != NULL) {
|
130
|
+
if (minfreq <= f->freq) {
|
131
|
+
++n;
|
132
|
+
}
|
133
|
+
}
|
134
|
+
|
135
|
+
/* The second path: copy the valid features to the feature array. */
|
136
|
+
features = (crf1df_feature_t*)calloc(n, sizeof(crf1df_feature_t));
|
137
|
+
if (features != NULL) {
|
138
|
+
node = NULL;
|
139
|
+
while ((node = rumavl_node_next(set->avl, node, 1, (void**)&f)) != NULL) {
|
140
|
+
if (minfreq <= f->freq) {
|
141
|
+
memcpy(&features[k], f, sizeof(crf1df_feature_t));
|
142
|
+
++k;
|
143
|
+
}
|
144
|
+
}
|
145
|
+
*ptr_num_features = n;
|
146
|
+
return features;
|
147
|
+
} else {
|
148
|
+
*ptr_num_features = 0;
|
149
|
+
return NULL;
|
150
|
+
}
|
151
|
+
}
|
152
|
+
|
153
|
+
|
154
|
+
|
155
|
+
crf1df_feature_t* crf1df_generate(
|
156
|
+
int *ptr_num_features,
|
157
|
+
dataset_t *ds,
|
158
|
+
int num_labels,
|
159
|
+
int num_attributes,
|
160
|
+
int connect_all_attrs,
|
161
|
+
int connect_all_edges,
|
162
|
+
floatval_t minfreq,
|
163
|
+
crfsuite_logging_callback func,
|
164
|
+
void *instance
|
165
|
+
)
|
166
|
+
{
|
167
|
+
int c, i, j, s, t;
|
168
|
+
crf1df_feature_t f;
|
169
|
+
crf1df_feature_t *features = NULL;
|
170
|
+
featureset_t* set = NULL;
|
171
|
+
const int N = ds->num_instances;
|
172
|
+
const int L = num_labels;
|
173
|
+
logging_t lg;
|
174
|
+
|
175
|
+
lg.func = func;
|
176
|
+
lg.instance = instance;
|
177
|
+
lg.percent = 0;
|
178
|
+
|
179
|
+
/* Create an instance of feature set. */
|
180
|
+
set = featureset_new();
|
181
|
+
|
182
|
+
/* Loop over the sequences in the training data. */
|
183
|
+
logging_progress_start(&lg);
|
184
|
+
|
185
|
+
for (s = 0;s < N;++s) {
|
186
|
+
int prev = L, cur = 0;
|
187
|
+
const crfsuite_item_t* item = NULL;
|
188
|
+
const crfsuite_instance_t* seq = dataset_get(ds, s);
|
189
|
+
const int T = seq->num_items;
|
190
|
+
|
191
|
+
/* Loop over the items in the sequence. */
|
192
|
+
for (t = 0;t < T;++t) {
|
193
|
+
item = &seq->items[t];
|
194
|
+
cur = seq->labels[t];
|
195
|
+
|
196
|
+
/* Transition feature: label #prev -> label #(item->yid).
|
197
|
+
Features with previous label #L are transition BOS. */
|
198
|
+
if (prev != L) {
|
199
|
+
f.type = FT_TRANS;
|
200
|
+
f.src = prev;
|
201
|
+
f.dst = cur;
|
202
|
+
f.freq = 1;
|
203
|
+
featureset_add(set, &f);
|
204
|
+
}
|
205
|
+
|
206
|
+
for (c = 0;c < item->num_contents;++c) {
|
207
|
+
/* State feature: attribute #a -> state #(item->yid). */
|
208
|
+
f.type = FT_STATE;
|
209
|
+
f.src = item->contents[c].aid;
|
210
|
+
f.dst = cur;
|
211
|
+
f.freq = item->contents[c].value;
|
212
|
+
featureset_add(set, &f);
|
213
|
+
|
214
|
+
/* Generate state features connecting attributes with all
|
215
|
+
output labels. These features are not unobserved in the
|
216
|
+
training data (zero expexcations). */
|
217
|
+
if (connect_all_attrs) {
|
218
|
+
for (i = 0;i < L;++i) {
|
219
|
+
f.type = FT_STATE;
|
220
|
+
f.src = item->contents[c].aid;
|
221
|
+
f.dst = i;
|
222
|
+
f.freq = 0;
|
223
|
+
featureset_add(set, &f);
|
224
|
+
}
|
225
|
+
}
|
226
|
+
}
|
227
|
+
|
228
|
+
prev = cur;
|
229
|
+
}
|
230
|
+
|
231
|
+
logging_progress(&lg, s * 100 / N);
|
232
|
+
}
|
233
|
+
logging_progress_end(&lg);
|
234
|
+
|
235
|
+
/* Generate edge features representing all pairs of labels.
|
236
|
+
These features are not unobserved in the training data
|
237
|
+
(zero expexcations). */
|
238
|
+
if (connect_all_edges) {
|
239
|
+
for (i = 0;i < L;++i) {
|
240
|
+
for (j = 0;j < L;++j) {
|
241
|
+
f.type = FT_TRANS;
|
242
|
+
f.src = i;
|
243
|
+
f.dst = j;
|
244
|
+
f.freq = 0;
|
245
|
+
featureset_add(set, &f);
|
246
|
+
}
|
247
|
+
}
|
248
|
+
}
|
249
|
+
|
250
|
+
/* Convert the feature set to an feature array. */
|
251
|
+
features = featureset_generate(ptr_num_features, set, minfreq);
|
252
|
+
|
253
|
+
/* Delete the feature set. */
|
254
|
+
featureset_delete(set);
|
255
|
+
|
256
|
+
return features;
|
257
|
+
}
|
258
|
+
|
259
|
+
int crf1df_init_references(
|
260
|
+
feature_refs_t **ptr_attributes,
|
261
|
+
feature_refs_t **ptr_trans,
|
262
|
+
const crf1df_feature_t *features,
|
263
|
+
const int K,
|
264
|
+
const int A,
|
265
|
+
const int L
|
266
|
+
)
|
267
|
+
{
|
268
|
+
int i, k;
|
269
|
+
feature_refs_t *fl = NULL;
|
270
|
+
feature_refs_t *attributes = NULL;
|
271
|
+
feature_refs_t *trans = NULL;
|
272
|
+
|
273
|
+
/*
|
274
|
+
The purpose of this routine is to collect references (indices) of:
|
275
|
+
- state features fired by each attribute (attributes)
|
276
|
+
- transition features pointing from each label (trans)
|
277
|
+
*/
|
278
|
+
|
279
|
+
/* Allocate arrays for feature references. */
|
280
|
+
attributes = (feature_refs_t*)calloc(A, sizeof(feature_refs_t));
|
281
|
+
if (attributes == NULL) goto error_exit;
|
282
|
+
trans = (feature_refs_t*)calloc(L, sizeof(feature_refs_t));
|
283
|
+
if (trans == NULL) goto error_exit;
|
284
|
+
|
285
|
+
/*
|
286
|
+
Firstly, loop over the features to count the number of references.
|
287
|
+
We don't use realloc() to avoid memory fragmentation.
|
288
|
+
*/
|
289
|
+
for (k = 0;k < K;++k) {
|
290
|
+
const crf1df_feature_t *f = &features[k];
|
291
|
+
switch (f->type) {
|
292
|
+
case FT_STATE:
|
293
|
+
attributes[f->src].num_features++;
|
294
|
+
break;
|
295
|
+
case FT_TRANS:
|
296
|
+
trans[f->src].num_features++;
|
297
|
+
break;
|
298
|
+
}
|
299
|
+
}
|
300
|
+
|
301
|
+
/*
|
302
|
+
Secondarily, allocate memory blocks to store the feature references.
|
303
|
+
We also clear fl->num_features fields, which will be used as indices
|
304
|
+
in the next phase.
|
305
|
+
*/
|
306
|
+
for (i = 0;i < A;++i) {
|
307
|
+
fl = &attributes[i];
|
308
|
+
fl->fids = (int*)calloc(fl->num_features, sizeof(int));
|
309
|
+
if (fl->fids == NULL) goto error_exit;
|
310
|
+
fl->num_features = 0;
|
311
|
+
}
|
312
|
+
for (i = 0;i < L;++i) {
|
313
|
+
fl = &trans[i];
|
314
|
+
fl->fids = (int*)calloc(fl->num_features, sizeof(int));
|
315
|
+
if (fl->fids == NULL) goto error_exit;
|
316
|
+
fl->num_features = 0;
|
317
|
+
}
|
318
|
+
|
319
|
+
/*
|
320
|
+
Finally, store the feature indices.
|
321
|
+
*/
|
322
|
+
for (k = 0;k < K;++k) {
|
323
|
+
const crf1df_feature_t *f = &features[k];
|
324
|
+
switch (f->type) {
|
325
|
+
case FT_STATE:
|
326
|
+
fl = &attributes[f->src];
|
327
|
+
fl->fids[fl->num_features++] = k;
|
328
|
+
break;
|
329
|
+
case FT_TRANS:
|
330
|
+
fl = &trans[f->src];
|
331
|
+
fl->fids[fl->num_features++] = k;
|
332
|
+
break;
|
333
|
+
}
|
334
|
+
}
|
335
|
+
|
336
|
+
*ptr_attributes = attributes;
|
337
|
+
*ptr_trans = trans;
|
338
|
+
return 0;
|
339
|
+
|
340
|
+
error_exit:
|
341
|
+
if (attributes != NULL) {
|
342
|
+
for (i = 0;i < A;++i) free(attributes[i].fids);
|
343
|
+
free(attributes);
|
344
|
+
}
|
345
|
+
if (trans != NULL) {
|
346
|
+
for (i = 0;i < L;++i) free(trans[i].fids);
|
347
|
+
free(trans);
|
348
|
+
}
|
349
|
+
*ptr_attributes = NULL;
|
350
|
+
*ptr_trans = NULL;
|
351
|
+
return -1;
|
352
|
+
}
|
@@ -0,0 +1,994 @@
|
|
1
|
+
/*
|
2
|
+
* CRF1d model.
|
3
|
+
*
|
4
|
+
* Copyright (c) 2007-2010, Naoaki Okazaki
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
* * Redistributions of source code must retain the above copyright
|
10
|
+
* notice, this list of conditions and the following disclaimer.
|
11
|
+
* * Redistributions in binary form must reproduce the above copyright
|
12
|
+
* notice, this list of conditions and the following disclaimer in the
|
13
|
+
* documentation and/or other materials provided with the distribution.
|
14
|
+
* * Neither the names of the authors nor the names of its contributors
|
15
|
+
* may be used to endorse or promote products derived from this
|
16
|
+
* software without specific prior written permission.
|
17
|
+
*
|
18
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
19
|
+
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
20
|
+
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
21
|
+
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
22
|
+
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
23
|
+
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
24
|
+
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
25
|
+
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
26
|
+
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
27
|
+
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
28
|
+
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
|
+
*/
|
30
|
+
|
31
|
+
/* $Id$ */
|
32
|
+
|
33
|
+
#include "os.h"
|
34
|
+
|
35
|
+
#include <stdio.h>
|
36
|
+
#include <stdlib.h>
|
37
|
+
#include <stdint.h>
|
38
|
+
#include <string.h>
|
39
|
+
#include <cqdb.h>
|
40
|
+
|
41
|
+
#include <crfsuite.h>
|
42
|
+
#include "crf1d.h"
|
43
|
+
|
44
|
+
#define FILEMAGIC "lCRF"
|
45
|
+
#define MODELTYPE "FOMC"
|
46
|
+
#define VERSION_NUMBER (100)
|
47
|
+
#define CHUNK_LABELREF "LFRF"
|
48
|
+
#define CHUNK_ATTRREF "AFRF"
|
49
|
+
#define CHUNK_FEATURE "FEAT"
|
50
|
+
#define HEADER_SIZE 48
|
51
|
+
#define CHUNK_SIZE 12
|
52
|
+
#define FEATURE_SIZE 20
|
53
|
+
|
54
|
+
enum {
|
55
|
+
WSTATE_NONE,
|
56
|
+
WSTATE_LABELS,
|
57
|
+
WSTATE_ATTRS,
|
58
|
+
WSTATE_LABELREFS,
|
59
|
+
WSTATE_ATTRREFS,
|
60
|
+
WSTATE_FEATURES,
|
61
|
+
};
|
62
|
+
|
63
|
+
typedef struct {
|
64
|
+
uint8_t magic[4]; /* File magic. */
|
65
|
+
uint32_t size; /* File size. */
|
66
|
+
uint8_t type[4]; /* Model type */
|
67
|
+
uint32_t version; /* Version number. */
|
68
|
+
uint32_t num_features; /* Number of features. */
|
69
|
+
uint32_t num_labels; /* Number of labels. */
|
70
|
+
uint32_t num_attrs; /* Number of attributes. */
|
71
|
+
uint32_t off_features; /* Offset to features. */
|
72
|
+
uint32_t off_labels; /* Offset to label CQDB. */
|
73
|
+
uint32_t off_attrs; /* Offset to attribute CQDB. */
|
74
|
+
uint32_t off_labelrefs; /* Offset to label feature references. */
|
75
|
+
uint32_t off_attrrefs; /* Offset to attribute feature references. */
|
76
|
+
} header_t;
|
77
|
+
|
78
|
+
typedef struct {
|
79
|
+
uint8_t chunk[4]; /* Chunk id */
|
80
|
+
uint32_t size; /* Chunk size. */
|
81
|
+
uint32_t num; /* Number of items. */
|
82
|
+
uint32_t offsets[1]; /* Offsets. */
|
83
|
+
} featureref_header_t;
|
84
|
+
|
85
|
+
typedef struct {
|
86
|
+
uint8_t chunk[4]; /* Chunk id */
|
87
|
+
uint32_t size; /* Chunk size. */
|
88
|
+
uint32_t num; /* Number of items. */
|
89
|
+
} feature_header_t;
|
90
|
+
|
91
|
+
struct tag_crf1dm {
|
92
|
+
uint8_t* buffer_orig;
|
93
|
+
uint8_t* buffer;
|
94
|
+
uint32_t size;
|
95
|
+
header_t* header;
|
96
|
+
cqdb_t* labels;
|
97
|
+
cqdb_t* attrs;
|
98
|
+
};
|
99
|
+
|
100
|
+
struct tag_crf1dmw {
|
101
|
+
FILE *fp;
|
102
|
+
int state;
|
103
|
+
header_t header;
|
104
|
+
cqdb_writer_t* dbw;
|
105
|
+
featureref_header_t* href;
|
106
|
+
feature_header_t* hfeat;
|
107
|
+
};
|
108
|
+
|
109
|
+
|
110
|
+
enum {
|
111
|
+
KT_GLOBAL = 'A',
|
112
|
+
KT_NUMATTRS,
|
113
|
+
KT_NUMLABELS,
|
114
|
+
KT_STR2LID,
|
115
|
+
KT_LID2STR,
|
116
|
+
KT_STR2AID,
|
117
|
+
KT_FEATURE,
|
118
|
+
};
|
119
|
+
|
120
|
+
static int write_uint8(FILE *fp, uint8_t value)
|
121
|
+
{
|
122
|
+
return fwrite(&value, sizeof(value), 1, fp) == 1 ? 0 : 1;
|
123
|
+
}
|
124
|
+
|
125
|
+
static int read_uint8(uint8_t* buffer, uint8_t* value)
|
126
|
+
{
|
127
|
+
*value = *buffer;
|
128
|
+
return sizeof(*value);
|
129
|
+
}
|
130
|
+
|
131
|
+
static int write_uint32(FILE *fp, uint32_t value)
|
132
|
+
{
|
133
|
+
uint8_t buffer[4];
|
134
|
+
buffer[0] = (uint8_t)(value & 0xFF);
|
135
|
+
buffer[1] = (uint8_t)(value >> 8);
|
136
|
+
buffer[2] = (uint8_t)(value >> 16);
|
137
|
+
buffer[3] = (uint8_t)(value >> 24);
|
138
|
+
return fwrite(buffer, sizeof(uint8_t), 4, fp) == 4 ? 0 : 1;
|
139
|
+
}
|
140
|
+
|
141
|
+
static int read_uint32(uint8_t* buffer, uint32_t* value)
|
142
|
+
{
|
143
|
+
*value = ((uint32_t)buffer[0]);
|
144
|
+
*value |= ((uint32_t)buffer[1] << 8);
|
145
|
+
*value |= ((uint32_t)buffer[2] << 16);
|
146
|
+
*value |= ((uint32_t)buffer[3] << 24);
|
147
|
+
return sizeof(*value);
|
148
|
+
}
|
149
|
+
|
150
|
+
static int write_uint8_array(FILE *fp, uint8_t *array, size_t n)
|
151
|
+
{
|
152
|
+
size_t i;
|
153
|
+
int ret = 0;
|
154
|
+
for (i = 0;i < n;++i) {
|
155
|
+
ret |= write_uint8(fp, array[i]);
|
156
|
+
}
|
157
|
+
return ret;
|
158
|
+
}
|
159
|
+
|
160
|
+
static int read_uint8_array(uint8_t* buffer, uint8_t *array, size_t n)
|
161
|
+
{
|
162
|
+
size_t i;
|
163
|
+
int ret = 0;
|
164
|
+
for (i = 0;i < n;++i) {
|
165
|
+
int size = read_uint8(buffer, &array[i]);
|
166
|
+
buffer += size;
|
167
|
+
ret += size;
|
168
|
+
}
|
169
|
+
return ret;
|
170
|
+
}
|
171
|
+
|
172
|
+
static void write_float(FILE *fp, floatval_t value)
|
173
|
+
{
|
174
|
+
/*
|
175
|
+
We assume:
|
176
|
+
- sizeof(floatval_t) = sizeof(double) = sizeof(uint64_t)
|
177
|
+
- the byte order of floatval_t and uint64_t is the same
|
178
|
+
- ARM's mixed-endian is not supported
|
179
|
+
*/
|
180
|
+
uint64_t iv;
|
181
|
+
uint8_t buffer[8];
|
182
|
+
|
183
|
+
/* Copy the memory image of floatval_t value to uint64_t. */
|
184
|
+
memcpy(&iv, &value, sizeof(iv));
|
185
|
+
|
186
|
+
buffer[0] = (uint8_t)(iv & 0xFF);
|
187
|
+
buffer[1] = (uint8_t)(iv >> 8);
|
188
|
+
buffer[2] = (uint8_t)(iv >> 16);
|
189
|
+
buffer[3] = (uint8_t)(iv >> 24);
|
190
|
+
buffer[4] = (uint8_t)(iv >> 32);
|
191
|
+
buffer[5] = (uint8_t)(iv >> 40);
|
192
|
+
buffer[6] = (uint8_t)(iv >> 48);
|
193
|
+
buffer[7] = (uint8_t)(iv >> 56);
|
194
|
+
fwrite(buffer, sizeof(uint8_t), 8, fp);
|
195
|
+
}
|
196
|
+
|
197
|
+
static int read_float(uint8_t* buffer, floatval_t* value)
|
198
|
+
{
|
199
|
+
uint64_t iv;
|
200
|
+
iv = ((uint64_t)buffer[0]);
|
201
|
+
iv |= ((uint64_t)buffer[1] << 8);
|
202
|
+
iv |= ((uint64_t)buffer[2] << 16);
|
203
|
+
iv |= ((uint64_t)buffer[3] << 24);
|
204
|
+
iv |= ((uint64_t)buffer[4] << 32);
|
205
|
+
iv |= ((uint64_t)buffer[5] << 40);
|
206
|
+
iv |= ((uint64_t)buffer[6] << 48);
|
207
|
+
iv |= ((uint64_t)buffer[7] << 56);
|
208
|
+
memcpy(value, &iv, sizeof(*value));
|
209
|
+
return sizeof(*value);
|
210
|
+
}
|
211
|
+
|
212
|
+
crf1dmw_t* crf1mmw(const char *filename)
|
213
|
+
{
|
214
|
+
header_t *header = NULL;
|
215
|
+
crf1dmw_t *writer = NULL;
|
216
|
+
|
217
|
+
/* Create a writer instance. */
|
218
|
+
writer = (crf1dmw_t*)calloc(1, sizeof(crf1dmw_t));
|
219
|
+
if (writer == NULL) {
|
220
|
+
goto error_exit;
|
221
|
+
}
|
222
|
+
|
223
|
+
/* Open the file for writing. */
|
224
|
+
writer->fp = fopen(filename, "wb");
|
225
|
+
if (writer->fp == NULL) {
|
226
|
+
goto error_exit;
|
227
|
+
}
|
228
|
+
|
229
|
+
/* Fill the members in the header. */
|
230
|
+
header = &writer->header;
|
231
|
+
strncpy(header->magic, FILEMAGIC, 4);
|
232
|
+
strncpy(header->type, MODELTYPE, 4);
|
233
|
+
header->version = VERSION_NUMBER;
|
234
|
+
|
235
|
+
/* Advance the file position to skip the file header. */
|
236
|
+
if (fseek(writer->fp, HEADER_SIZE, SEEK_CUR) != 0) {
|
237
|
+
goto error_exit;
|
238
|
+
}
|
239
|
+
|
240
|
+
return writer;
|
241
|
+
|
242
|
+
error_exit:
|
243
|
+
if (writer != NULL) {
|
244
|
+
if (writer->fp != NULL) {
|
245
|
+
fclose(writer->fp);
|
246
|
+
}
|
247
|
+
free(writer);
|
248
|
+
}
|
249
|
+
return NULL;
|
250
|
+
}
|
251
|
+
|
252
|
+
int crf1dmw_close(crf1dmw_t* writer)
|
253
|
+
{
|
254
|
+
FILE *fp = writer->fp;
|
255
|
+
header_t *header = &writer->header;
|
256
|
+
|
257
|
+
/* Store the file size. */
|
258
|
+
header->size = (uint32_t)ftell(fp);
|
259
|
+
|
260
|
+
/* Move the file position to the head. */
|
261
|
+
if (fseek(fp, 0, SEEK_SET) != 0) {
|
262
|
+
goto error_exit;
|
263
|
+
}
|
264
|
+
|
265
|
+
/* Write the file header. */
|
266
|
+
write_uint8_array(fp, header->magic, sizeof(header->magic));
|
267
|
+
write_uint32(fp, header->size);
|
268
|
+
write_uint8_array(fp, header->type, sizeof(header->type));
|
269
|
+
write_uint32(fp, header->version);
|
270
|
+
write_uint32(fp, header->num_features);
|
271
|
+
write_uint32(fp, header->num_labels);
|
272
|
+
write_uint32(fp, header->num_attrs);
|
273
|
+
write_uint32(fp, header->off_features);
|
274
|
+
write_uint32(fp, header->off_labels);
|
275
|
+
write_uint32(fp, header->off_attrs);
|
276
|
+
write_uint32(fp, header->off_labelrefs);
|
277
|
+
write_uint32(fp, header->off_attrrefs);
|
278
|
+
|
279
|
+
/* Check for any error occurrence. */
|
280
|
+
if (ferror(fp)) {
|
281
|
+
goto error_exit;
|
282
|
+
}
|
283
|
+
|
284
|
+
/* Close the writer. */
|
285
|
+
fclose(fp);
|
286
|
+
free(writer);
|
287
|
+
return 0;
|
288
|
+
|
289
|
+
error_exit:
|
290
|
+
if (writer != NULL) {
|
291
|
+
if (writer->fp != NULL) {
|
292
|
+
fclose(writer->fp);
|
293
|
+
}
|
294
|
+
free(writer);
|
295
|
+
}
|
296
|
+
return 1;
|
297
|
+
}
|
298
|
+
|
299
|
+
int crf1dmw_open_labels(crf1dmw_t* writer, int num_labels)
|
300
|
+
{
|
301
|
+
/* Check if we aren't writing anything at this moment. */
|
302
|
+
if (writer->state != WSTATE_NONE) {
|
303
|
+
return 1;
|
304
|
+
}
|
305
|
+
|
306
|
+
/* Store the current offset. */
|
307
|
+
writer->header.off_labels = (uint32_t)ftell(writer->fp);
|
308
|
+
|
309
|
+
/* Open a CQDB chunk for writing. */
|
310
|
+
writer->dbw = cqdb_writer(writer->fp, 0);
|
311
|
+
if (writer->dbw == NULL) {
|
312
|
+
writer->header.off_labels = 0;
|
313
|
+
return 1;
|
314
|
+
}
|
315
|
+
|
316
|
+
writer->state = WSTATE_LABELS;
|
317
|
+
writer->header.num_labels = num_labels;
|
318
|
+
return 0;
|
319
|
+
}
|
320
|
+
|
321
|
+
int crf1dmw_close_labels(crf1dmw_t* writer)
|
322
|
+
{
|
323
|
+
/* Make sure that we are writing labels. */
|
324
|
+
if (writer->state != WSTATE_LABELS) {
|
325
|
+
return 1;
|
326
|
+
}
|
327
|
+
|
328
|
+
/* Close the CQDB chunk. */
|
329
|
+
if (cqdb_writer_close(writer->dbw)) {
|
330
|
+
return 1;
|
331
|
+
}
|
332
|
+
|
333
|
+
writer->dbw = NULL;
|
334
|
+
writer->state = WSTATE_NONE;
|
335
|
+
return 0;
|
336
|
+
}
|
337
|
+
|
338
|
+
int crf1dmw_put_label(crf1dmw_t* writer, int lid, const char *value)
|
339
|
+
{
|
340
|
+
/* Make sure that we are writing labels. */
|
341
|
+
if (writer->state != WSTATE_LABELS) {
|
342
|
+
return 1;
|
343
|
+
}
|
344
|
+
|
345
|
+
/* Put the label. */
|
346
|
+
if (cqdb_writer_put(writer->dbw, value, lid)) {
|
347
|
+
return 1;
|
348
|
+
}
|
349
|
+
|
350
|
+
return 0;
|
351
|
+
}
|
352
|
+
|
353
|
+
int crf1dmw_open_attrs(crf1dmw_t* writer, int num_attrs)
|
354
|
+
{
|
355
|
+
/* Check if we aren't writing anything at this moment. */
|
356
|
+
if (writer->state != WSTATE_NONE) {
|
357
|
+
return 1;
|
358
|
+
}
|
359
|
+
|
360
|
+
/* Store the current offset. */
|
361
|
+
writer->header.off_attrs = (uint32_t)ftell(writer->fp);
|
362
|
+
|
363
|
+
/* Open a CQDB chunk for writing. */
|
364
|
+
writer->dbw = cqdb_writer(writer->fp, 0);
|
365
|
+
if (writer->dbw == NULL) {
|
366
|
+
writer->header.off_attrs = 0;
|
367
|
+
return 1;
|
368
|
+
}
|
369
|
+
|
370
|
+
writer->state = WSTATE_ATTRS;
|
371
|
+
writer->header.num_attrs = num_attrs;
|
372
|
+
return 0;
|
373
|
+
}
|
374
|
+
|
375
|
+
int crf1dmw_close_attrs(crf1dmw_t* writer)
|
376
|
+
{
|
377
|
+
/* Make sure that we are writing attributes. */
|
378
|
+
if (writer->state != WSTATE_ATTRS) {
|
379
|
+
return 1;
|
380
|
+
}
|
381
|
+
|
382
|
+
/* Close the CQDB chunk. */
|
383
|
+
if (cqdb_writer_close(writer->dbw)) {
|
384
|
+
return 1;
|
385
|
+
}
|
386
|
+
|
387
|
+
writer->dbw = NULL;
|
388
|
+
writer->state = WSTATE_NONE;
|
389
|
+
return 0;
|
390
|
+
}
|
391
|
+
|
392
|
+
int crf1dmw_put_attr(crf1dmw_t* writer, int aid, const char *value)
|
393
|
+
{
|
394
|
+
/* Make sure that we are writing labels. */
|
395
|
+
if (writer->state != WSTATE_ATTRS) {
|
396
|
+
return 1;
|
397
|
+
}
|
398
|
+
|
399
|
+
/* Put the attribute. */
|
400
|
+
if (cqdb_writer_put(writer->dbw, value, aid)) {
|
401
|
+
return 1;
|
402
|
+
}
|
403
|
+
|
404
|
+
return 0;
|
405
|
+
}
|
406
|
+
|
407
|
+
int crf1dmw_open_labelrefs(crf1dmw_t* writer, int num_labels)
|
408
|
+
{
|
409
|
+
uint32_t offset;
|
410
|
+
FILE *fp = writer->fp;
|
411
|
+
featureref_header_t* href = NULL;
|
412
|
+
size_t size = CHUNK_SIZE + sizeof(uint32_t) * num_labels;
|
413
|
+
|
414
|
+
/* Check if we aren't writing anything at this moment. */
|
415
|
+
if (writer->state != WSTATE_NONE) {
|
416
|
+
return CRFSUITEERR_INTERNAL_LOGIC;
|
417
|
+
}
|
418
|
+
|
419
|
+
/* Allocate a feature reference array. */
|
420
|
+
href = (featureref_header_t*)calloc(size, 1);
|
421
|
+
if (href == NULL) {
|
422
|
+
return CRFSUITEERR_OUTOFMEMORY;
|
423
|
+
}
|
424
|
+
|
425
|
+
/* Align the offset to a DWORD boundary. */
|
426
|
+
offset = (uint32_t)ftell(fp);
|
427
|
+
while (offset % 4 != 0) {
|
428
|
+
uint8_t c = 0;
|
429
|
+
fwrite(&c, sizeof(uint8_t), 1, fp);
|
430
|
+
++offset;
|
431
|
+
}
|
432
|
+
|
433
|
+
/* Store the current offset position to the file header. */
|
434
|
+
writer->header.off_labelrefs = offset;
|
435
|
+
fseek(fp, size, SEEK_CUR);
|
436
|
+
|
437
|
+
/* Fill members in the feature reference header. */
|
438
|
+
strncpy(href->chunk, CHUNK_LABELREF, 4);
|
439
|
+
href->size = 0;
|
440
|
+
href->num = num_labels;
|
441
|
+
|
442
|
+
writer->href = href;
|
443
|
+
writer->state = WSTATE_LABELREFS;
|
444
|
+
return 0;
|
445
|
+
}
|
446
|
+
|
447
|
+
int crf1dmw_close_labelrefs(crf1dmw_t* writer)
|
448
|
+
{
|
449
|
+
uint32_t i;
|
450
|
+
FILE *fp = writer->fp;
|
451
|
+
featureref_header_t* href = writer->href;
|
452
|
+
uint32_t begin = writer->header.off_labelrefs, end = 0;
|
453
|
+
|
454
|
+
/* Make sure that we are writing label feature references. */
|
455
|
+
if (writer->state != WSTATE_LABELREFS) {
|
456
|
+
return CRFSUITEERR_INTERNAL_LOGIC;
|
457
|
+
}
|
458
|
+
|
459
|
+
/* Store the current offset position. */
|
460
|
+
end = (uint32_t)ftell(fp);
|
461
|
+
|
462
|
+
/* Compute the size of this chunk. */
|
463
|
+
href->size = (end - begin);
|
464
|
+
|
465
|
+
/* Write the chunk header and offset array. */
|
466
|
+
fseek(fp, begin, SEEK_SET);
|
467
|
+
write_uint8_array(fp, href->chunk, 4);
|
468
|
+
write_uint32(fp, href->size);
|
469
|
+
write_uint32(fp, href->num);
|
470
|
+
for (i = 0;i < href->num;++i) {
|
471
|
+
write_uint32(fp, href->offsets[i]);
|
472
|
+
}
|
473
|
+
|
474
|
+
/* Move the file pointer to the tail. */
|
475
|
+
fseek(fp, end, SEEK_SET);
|
476
|
+
|
477
|
+
/* Uninitialize. */
|
478
|
+
free(href);
|
479
|
+
writer->href = NULL;
|
480
|
+
writer->state = WSTATE_NONE;
|
481
|
+
return 0;
|
482
|
+
}
|
483
|
+
|
484
|
+
int crf1dmw_put_labelref(crf1dmw_t* writer, int lid, const feature_refs_t* ref, int *map)
|
485
|
+
{
|
486
|
+
int i, fid;
|
487
|
+
uint32_t n = 0, offset = 0;
|
488
|
+
FILE *fp = writer->fp;
|
489
|
+
featureref_header_t* href = writer->href;
|
490
|
+
|
491
|
+
/* Make sure that we are writing label feature references. */
|
492
|
+
if (writer->state != WSTATE_LABELREFS) {
|
493
|
+
return CRFSUITEERR_INTERNAL_LOGIC;
|
494
|
+
}
|
495
|
+
|
496
|
+
/* Store the current offset to the offset array. */
|
497
|
+
href->offsets[lid] = ftell(fp);
|
498
|
+
|
499
|
+
/* Count the number of references to active features. */
|
500
|
+
for (i = 0;i < ref->num_features;++i) {
|
501
|
+
if (0 <= map[ref->fids[i]]) ++n;
|
502
|
+
}
|
503
|
+
|
504
|
+
/* Write the feature reference. */
|
505
|
+
write_uint32(fp, (uint32_t)n);
|
506
|
+
for (i = 0;i < ref->num_features;++i) {
|
507
|
+
fid = map[ref->fids[i]];
|
508
|
+
if (0 <= fid) write_uint32(fp, (uint32_t)fid);
|
509
|
+
}
|
510
|
+
|
511
|
+
return 0;
|
512
|
+
}
|
513
|
+
|
514
|
+
int crf1dmw_open_attrrefs(crf1dmw_t* writer, int num_attrs)
|
515
|
+
{
|
516
|
+
uint32_t offset;
|
517
|
+
FILE *fp = writer->fp;
|
518
|
+
featureref_header_t* href = NULL;
|
519
|
+
size_t size = CHUNK_SIZE + sizeof(uint32_t) * num_attrs;
|
520
|
+
|
521
|
+
/* Check if we aren't writing anything at this moment. */
|
522
|
+
if (writer->state != WSTATE_NONE) {
|
523
|
+
return CRFSUITEERR_INTERNAL_LOGIC;
|
524
|
+
}
|
525
|
+
|
526
|
+
/* Allocate a feature reference array. */
|
527
|
+
href = (featureref_header_t*)calloc(size, 1);
|
528
|
+
if (href == NULL) {
|
529
|
+
return CRFSUITEERR_OUTOFMEMORY;
|
530
|
+
}
|
531
|
+
|
532
|
+
/* Align the offset to a DWORD boundary. */
|
533
|
+
offset = (uint32_t)ftell(fp);
|
534
|
+
while (offset % 4 != 0) {
|
535
|
+
uint8_t c = 0;
|
536
|
+
fwrite(&c, sizeof(uint8_t), 1, fp);
|
537
|
+
++offset;
|
538
|
+
}
|
539
|
+
|
540
|
+
/* Store the current offset position to the file header. */
|
541
|
+
writer->header.off_attrrefs = offset;
|
542
|
+
fseek(fp, size, SEEK_CUR);
|
543
|
+
|
544
|
+
/* Fill members in the feature reference header. */
|
545
|
+
strncpy(href->chunk, CHUNK_ATTRREF, 4);
|
546
|
+
href->size = 0;
|
547
|
+
href->num = num_attrs;
|
548
|
+
|
549
|
+
writer->href = href;
|
550
|
+
writer->state = WSTATE_ATTRREFS;
|
551
|
+
return 0;
|
552
|
+
}
|
553
|
+
|
554
|
+
int crf1dmw_close_attrrefs(crf1dmw_t* writer)
|
555
|
+
{
|
556
|
+
uint32_t i;
|
557
|
+
FILE *fp = writer->fp;
|
558
|
+
featureref_header_t* href = writer->href;
|
559
|
+
uint32_t begin = writer->header.off_attrrefs, end = 0;
|
560
|
+
|
561
|
+
/* Make sure that we are writing attribute feature references. */
|
562
|
+
if (writer->state != WSTATE_ATTRREFS) {
|
563
|
+
return CRFSUITEERR_INTERNAL_LOGIC;
|
564
|
+
}
|
565
|
+
|
566
|
+
/* Store the current offset position. */
|
567
|
+
end = (uint32_t)ftell(fp);
|
568
|
+
|
569
|
+
/* Compute the size of this chunk. */
|
570
|
+
href->size = (end - begin);
|
571
|
+
|
572
|
+
/* Write the chunk header and offset array. */
|
573
|
+
fseek(fp, begin, SEEK_SET);
|
574
|
+
write_uint8_array(fp, href->chunk, 4);
|
575
|
+
write_uint32(fp, href->size);
|
576
|
+
write_uint32(fp, href->num);
|
577
|
+
for (i = 0;i < href->num;++i) {
|
578
|
+
write_uint32(fp, href->offsets[i]);
|
579
|
+
}
|
580
|
+
|
581
|
+
/* Move the file pointer to the tail. */
|
582
|
+
fseek(fp, end, SEEK_SET);
|
583
|
+
|
584
|
+
/* Uninitialize. */
|
585
|
+
free(href);
|
586
|
+
writer->href = NULL;
|
587
|
+
writer->state = WSTATE_NONE;
|
588
|
+
return 0;
|
589
|
+
}
|
590
|
+
|
591
|
+
int crf1dmw_put_attrref(crf1dmw_t* writer, int aid, const feature_refs_t* ref, int *map)
|
592
|
+
{
|
593
|
+
int i, fid;
|
594
|
+
uint32_t n = 0, offset = 0;
|
595
|
+
FILE *fp = writer->fp;
|
596
|
+
featureref_header_t* href = writer->href;
|
597
|
+
|
598
|
+
/* Make sure that we are writing attribute feature references. */
|
599
|
+
if (writer->state != WSTATE_ATTRREFS) {
|
600
|
+
return CRFSUITEERR_INTERNAL_LOGIC;
|
601
|
+
}
|
602
|
+
|
603
|
+
/* Store the current offset to the offset array. */
|
604
|
+
href->offsets[aid] = ftell(fp);
|
605
|
+
|
606
|
+
/* Count the number of references to active features. */
|
607
|
+
for (i = 0;i < ref->num_features;++i) {
|
608
|
+
if (0 <= map[ref->fids[i]]) ++n;
|
609
|
+
}
|
610
|
+
|
611
|
+
/* Write the feature reference. */
|
612
|
+
write_uint32(fp, (uint32_t)n);
|
613
|
+
for (i = 0;i < ref->num_features;++i) {
|
614
|
+
fid = map[ref->fids[i]];
|
615
|
+
if (0 <= fid) write_uint32(fp, (uint32_t)fid);
|
616
|
+
}
|
617
|
+
|
618
|
+
return 0;
|
619
|
+
}
|
620
|
+
|
621
|
+
int crf1dmw_open_features(crf1dmw_t* writer)
|
622
|
+
{
|
623
|
+
FILE *fp = writer->fp;
|
624
|
+
feature_header_t* hfeat = NULL;
|
625
|
+
|
626
|
+
/* Check if we aren't writing anything at this moment. */
|
627
|
+
if (writer->state != WSTATE_NONE) {
|
628
|
+
return CRFSUITEERR_INTERNAL_LOGIC;
|
629
|
+
}
|
630
|
+
|
631
|
+
/* Allocate a feature chunk header. */
|
632
|
+
hfeat = (feature_header_t*)calloc(sizeof(feature_header_t), 1);
|
633
|
+
if (hfeat == NULL) {
|
634
|
+
return CRFSUITEERR_OUTOFMEMORY;
|
635
|
+
}
|
636
|
+
|
637
|
+
writer->header.off_features = (uint32_t)ftell(fp);
|
638
|
+
fseek(fp, CHUNK_SIZE, SEEK_CUR);
|
639
|
+
|
640
|
+
strncpy(hfeat->chunk, CHUNK_FEATURE, 4);
|
641
|
+
writer->hfeat = hfeat;
|
642
|
+
|
643
|
+
writer->state = WSTATE_FEATURES;
|
644
|
+
return 0;
|
645
|
+
}
|
646
|
+
|
647
|
+
int crf1dmw_close_features(crf1dmw_t* writer)
|
648
|
+
{
|
649
|
+
FILE *fp = writer->fp;
|
650
|
+
feature_header_t* hfeat = writer->hfeat;
|
651
|
+
uint32_t begin = writer->header.off_features, end = 0;
|
652
|
+
|
653
|
+
/* Make sure that we are writing attribute feature references. */
|
654
|
+
if (writer->state != WSTATE_FEATURES) {
|
655
|
+
return CRFSUITEERR_INTERNAL_LOGIC;
|
656
|
+
}
|
657
|
+
|
658
|
+
/* Store the current offset position. */
|
659
|
+
end = (uint32_t)ftell(fp);
|
660
|
+
|
661
|
+
/* Compute the size of this chunk. */
|
662
|
+
hfeat->size = (end - begin);
|
663
|
+
|
664
|
+
/* Write the chunk header and offset array. */
|
665
|
+
fseek(fp, begin, SEEK_SET);
|
666
|
+
write_uint8_array(fp, hfeat->chunk, 4);
|
667
|
+
write_uint32(fp, hfeat->size);
|
668
|
+
write_uint32(fp, hfeat->num);
|
669
|
+
|
670
|
+
/* Move the file pointer to the tail. */
|
671
|
+
fseek(fp, end, SEEK_SET);
|
672
|
+
|
673
|
+
/* Uninitialize. */
|
674
|
+
free(hfeat);
|
675
|
+
writer->hfeat = NULL;
|
676
|
+
writer->state = WSTATE_NONE;
|
677
|
+
return 0;
|
678
|
+
}
|
679
|
+
|
680
|
+
int crf1dmw_put_feature(crf1dmw_t* writer, int fid, const crf1dm_feature_t* f)
|
681
|
+
{
|
682
|
+
FILE *fp = writer->fp;
|
683
|
+
feature_header_t* hfeat = writer->hfeat;
|
684
|
+
|
685
|
+
/* Make sure that we are writing attribute feature references. */
|
686
|
+
if (writer->state != WSTATE_FEATURES) {
|
687
|
+
return CRFSUITEERR_INTERNAL_LOGIC;
|
688
|
+
}
|
689
|
+
|
690
|
+
/* We must put features #0, #1, ..., #(K-1) in this order. */
|
691
|
+
if (fid != hfeat->num) {
|
692
|
+
return CRFSUITEERR_INTERNAL_LOGIC;
|
693
|
+
}
|
694
|
+
|
695
|
+
write_uint32(fp, f->type);
|
696
|
+
write_uint32(fp, f->src);
|
697
|
+
write_uint32(fp, f->dst);
|
698
|
+
write_float(fp, f->weight);
|
699
|
+
++hfeat->num;
|
700
|
+
return 0;
|
701
|
+
}
|
702
|
+
|
703
|
+
crf1dm_t* crf1dm_new(const char *filename)
|
704
|
+
{
|
705
|
+
FILE *fp = NULL;
|
706
|
+
uint8_t* p = NULL;
|
707
|
+
crf1dm_t *model = NULL;
|
708
|
+
header_t *header = NULL;
|
709
|
+
|
710
|
+
model = (crf1dm_t*)calloc(1, sizeof(crf1dm_t));
|
711
|
+
if (model == NULL) {
|
712
|
+
goto error_exit;
|
713
|
+
}
|
714
|
+
|
715
|
+
fp = fopen(filename, "rb");
|
716
|
+
if (fp == NULL) {
|
717
|
+
goto error_exit;
|
718
|
+
}
|
719
|
+
|
720
|
+
fseek(fp, 0, SEEK_END);
|
721
|
+
model->size = (uint32_t)ftell(fp);
|
722
|
+
fseek(fp, 0, SEEK_SET);
|
723
|
+
|
724
|
+
model->buffer = model->buffer_orig = (uint8_t*)malloc(model->size + 16);
|
725
|
+
while ((uintptr_t)model->buffer % 16 != 0) {
|
726
|
+
++model->buffer;
|
727
|
+
}
|
728
|
+
|
729
|
+
if (fread(model->buffer, 1, model->size, fp) != model->size) {
|
730
|
+
free(model->buffer_orig);
|
731
|
+
goto error_exit;
|
732
|
+
}
|
733
|
+
fclose(fp);
|
734
|
+
|
735
|
+
/* Write the file header. */
|
736
|
+
header = (header_t*)calloc(1, sizeof(header_t));
|
737
|
+
|
738
|
+
p = model->buffer;
|
739
|
+
p += read_uint8_array(p, header->magic, sizeof(header->magic));
|
740
|
+
p += read_uint32(p, &header->size);
|
741
|
+
p += read_uint8_array(p, header->type, sizeof(header->type));
|
742
|
+
p += read_uint32(p, &header->version);
|
743
|
+
p += read_uint32(p, &header->num_features);
|
744
|
+
p += read_uint32(p, &header->num_labels);
|
745
|
+
p += read_uint32(p, &header->num_attrs);
|
746
|
+
p += read_uint32(p, &header->off_features);
|
747
|
+
p += read_uint32(p, &header->off_labels);
|
748
|
+
p += read_uint32(p, &header->off_attrs);
|
749
|
+
p += read_uint32(p, &header->off_labelrefs);
|
750
|
+
p += read_uint32(p, &header->off_attrrefs);
|
751
|
+
model->header = header;
|
752
|
+
|
753
|
+
model->labels = cqdb_reader(
|
754
|
+
model->buffer + header->off_labels,
|
755
|
+
model->size - header->off_labels
|
756
|
+
);
|
757
|
+
|
758
|
+
model->attrs = cqdb_reader(
|
759
|
+
model->buffer + header->off_attrs,
|
760
|
+
model->size - header->off_attrs
|
761
|
+
);
|
762
|
+
|
763
|
+
return model;
|
764
|
+
|
765
|
+
error_exit:
|
766
|
+
if (model != NULL) {
|
767
|
+
free(model);
|
768
|
+
}
|
769
|
+
if (fp != NULL) {
|
770
|
+
fclose(fp);
|
771
|
+
}
|
772
|
+
return NULL;
|
773
|
+
}
|
774
|
+
|
775
|
+
void crf1dm_close(crf1dm_t* model)
|
776
|
+
{
|
777
|
+
if (model->labels != NULL) {
|
778
|
+
cqdb_delete(model->labels);
|
779
|
+
}
|
780
|
+
if (model->attrs != NULL) {
|
781
|
+
cqdb_delete(model->attrs);
|
782
|
+
}
|
783
|
+
if (model->header != NULL) {
|
784
|
+
free(model->header);
|
785
|
+
model->header = NULL;
|
786
|
+
}
|
787
|
+
if (model->buffer_orig != NULL) {
|
788
|
+
free(model->buffer_orig);
|
789
|
+
model->buffer_orig = model->buffer = NULL;
|
790
|
+
}
|
791
|
+
free(model);
|
792
|
+
}
|
793
|
+
|
794
|
+
int crf1dm_get_num_attrs(crf1dm_t* model)
|
795
|
+
{
|
796
|
+
return model->header->num_attrs;
|
797
|
+
}
|
798
|
+
|
799
|
+
int crf1dm_get_num_labels(crf1dm_t* model)
|
800
|
+
{
|
801
|
+
return model->header->num_labels;
|
802
|
+
}
|
803
|
+
|
804
|
+
const char *crf1dm_to_label(crf1dm_t* model, int lid)
|
805
|
+
{
|
806
|
+
if (model->labels != NULL) {
|
807
|
+
return cqdb_to_string(model->labels, lid);
|
808
|
+
} else {
|
809
|
+
return NULL;
|
810
|
+
}
|
811
|
+
}
|
812
|
+
|
813
|
+
int crf1dm_to_lid(crf1dm_t* model, const char *value)
|
814
|
+
{
|
815
|
+
if (model->labels != NULL) {
|
816
|
+
return cqdb_to_id(model->labels, value);
|
817
|
+
} else {
|
818
|
+
return -1;
|
819
|
+
}
|
820
|
+
}
|
821
|
+
|
822
|
+
int crf1dm_to_aid(crf1dm_t* model, const char *value)
|
823
|
+
{
|
824
|
+
if (model->attrs != NULL) {
|
825
|
+
return cqdb_to_id(model->attrs, value);
|
826
|
+
} else {
|
827
|
+
return -1;
|
828
|
+
}
|
829
|
+
}
|
830
|
+
|
831
|
+
const char *crf1dm_to_attr(crf1dm_t* model, int aid)
|
832
|
+
{
|
833
|
+
if (model->attrs != NULL) {
|
834
|
+
return cqdb_to_string(model->attrs, aid);
|
835
|
+
} else {
|
836
|
+
return NULL;
|
837
|
+
}
|
838
|
+
}
|
839
|
+
|
840
|
+
int crf1dm_get_labelref(crf1dm_t* model, int lid, feature_refs_t* ref)
|
841
|
+
{
|
842
|
+
uint8_t *p = model->buffer;
|
843
|
+
uint32_t offset;
|
844
|
+
|
845
|
+
p += model->header->off_labelrefs;
|
846
|
+
p += CHUNK_SIZE;
|
847
|
+
p += sizeof(uint32_t) * lid;
|
848
|
+
read_uint32(p, &offset);
|
849
|
+
|
850
|
+
p = model->buffer + offset;
|
851
|
+
p += read_uint32(p, &ref->num_features);
|
852
|
+
ref->fids = (int*)p;
|
853
|
+
return 0;
|
854
|
+
}
|
855
|
+
|
856
|
+
int crf1dm_get_attrref(crf1dm_t* model, int aid, feature_refs_t* ref)
|
857
|
+
{
|
858
|
+
uint8_t *p = model->buffer;
|
859
|
+
uint32_t offset;
|
860
|
+
|
861
|
+
p += model->header->off_attrrefs;
|
862
|
+
p += CHUNK_SIZE;
|
863
|
+
p += sizeof(uint32_t) * aid;
|
864
|
+
read_uint32(p, &offset);
|
865
|
+
|
866
|
+
p = model->buffer + offset;
|
867
|
+
p += read_uint32(p, &ref->num_features);
|
868
|
+
ref->fids = (int*)p;
|
869
|
+
return 0;
|
870
|
+
}
|
871
|
+
|
872
|
+
int crf1dm_get_featureid(feature_refs_t* ref, int i)
|
873
|
+
{
|
874
|
+
uint32_t fid;
|
875
|
+
uint8_t* p = (uint8_t*)ref->fids;
|
876
|
+
p += sizeof(uint32_t) * i;
|
877
|
+
read_uint32(p, &fid);
|
878
|
+
return (int)fid;
|
879
|
+
}
|
880
|
+
|
881
|
+
int crf1dm_get_feature(crf1dm_t* model, int fid, crf1dm_feature_t* f)
|
882
|
+
{
|
883
|
+
uint8_t *p = NULL;
|
884
|
+
uint32_t val = 0;
|
885
|
+
uint32_t offset = model->header->off_features + CHUNK_SIZE;
|
886
|
+
offset += FEATURE_SIZE * fid;
|
887
|
+
p = model->buffer + offset;
|
888
|
+
p += read_uint32(p, &val);
|
889
|
+
f->type = val;
|
890
|
+
p += read_uint32(p, &val);
|
891
|
+
f->src = val;
|
892
|
+
p += read_uint32(p, &val);
|
893
|
+
f->dst = val;
|
894
|
+
p += read_float(p, &f->weight);
|
895
|
+
return 0;
|
896
|
+
}
|
897
|
+
|
898
|
+
void crf1dm_dump(crf1dm_t* crf1dm, FILE *fp)
|
899
|
+
{
|
900
|
+
int j;
|
901
|
+
uint32_t i;
|
902
|
+
feature_refs_t refs;
|
903
|
+
const header_t* hfile = crf1dm->header;
|
904
|
+
|
905
|
+
/* Dump the file header. */
|
906
|
+
fprintf(fp, "FILEHEADER = {\n");
|
907
|
+
fprintf(fp, " magic: %c%c%c%c\n",
|
908
|
+
hfile->magic[0], hfile->magic[1], hfile->magic[2], hfile->magic[3]);
|
909
|
+
fprintf(fp, " size: %d\n", hfile->size);
|
910
|
+
fprintf(fp, " type: %c%c%c%c\n",
|
911
|
+
hfile->type[0], hfile->type[1], hfile->type[2], hfile->type[3]);
|
912
|
+
fprintf(fp, " version: %d\n", hfile->version);
|
913
|
+
fprintf(fp, " num_features: %d\n", hfile->num_features);
|
914
|
+
fprintf(fp, " num_labels: %d\n", hfile->num_labels);
|
915
|
+
fprintf(fp, " num_attrs: %d\n", hfile->num_attrs);
|
916
|
+
fprintf(fp, " off_features: 0x%X\n", hfile->off_features);
|
917
|
+
fprintf(fp, " off_labels: 0x%X\n", hfile->off_labels);
|
918
|
+
fprintf(fp, " off_attrs: 0x%X\n", hfile->off_attrs);
|
919
|
+
fprintf(fp, " off_labelrefs: 0x%X\n", hfile->off_labelrefs);
|
920
|
+
fprintf(fp, " off_attrrefs: 0x%X\n", hfile->off_attrrefs);
|
921
|
+
fprintf(fp, "}\n");
|
922
|
+
fprintf(fp, "\n");
|
923
|
+
|
924
|
+
/* Dump the labels. */
|
925
|
+
fprintf(fp, "LABELS = {\n");
|
926
|
+
for (i = 0;i < hfile->num_labels;++i) {
|
927
|
+
const char *str = crf1dm_to_label(crf1dm, i);
|
928
|
+
#if 0
|
929
|
+
int check = crf1dm_to_lid(crf1dm, str);
|
930
|
+
if (i != check) {
|
931
|
+
fprintf(fp, "WARNING: inconsistent label CQDB\n");
|
932
|
+
}
|
933
|
+
#endif
|
934
|
+
fprintf(fp, " %5d: %s\n", i, str);
|
935
|
+
}
|
936
|
+
fprintf(fp, "}\n");
|
937
|
+
fprintf(fp, "\n");
|
938
|
+
|
939
|
+
/* Dump the attributes. */
|
940
|
+
fprintf(fp, "ATTRIBUTES = {\n");
|
941
|
+
for (i = 0;i < hfile->num_attrs;++i) {
|
942
|
+
const char *str = crf1dm_to_attr(crf1dm, i);
|
943
|
+
#if 0
|
944
|
+
int check = crf1dm_to_aid(crf1dm, str);
|
945
|
+
if (i != check) {
|
946
|
+
fprintf(fp, "WARNING: inconsistent attribute CQDB\n");
|
947
|
+
}
|
948
|
+
#endif
|
949
|
+
fprintf(fp, " %5d: %s\n", i, str);
|
950
|
+
}
|
951
|
+
fprintf(fp, "}\n");
|
952
|
+
fprintf(fp, "\n");
|
953
|
+
|
954
|
+
/* Dump the transition features. */
|
955
|
+
fprintf(fp, "TRANSITIONS = {\n");
|
956
|
+
for (i = 0;i < hfile->num_labels;++i) {
|
957
|
+
crf1dm_get_labelref(crf1dm, i, &refs);
|
958
|
+
for (j = 0;j < refs.num_features;++j) {
|
959
|
+
crf1dm_feature_t f;
|
960
|
+
int fid = crf1dm_get_featureid(&refs, j);
|
961
|
+
const char *from = NULL, *to = NULL;
|
962
|
+
|
963
|
+
crf1dm_get_feature(crf1dm, fid, &f);
|
964
|
+
from = crf1dm_to_label(crf1dm, f.src);
|
965
|
+
to = crf1dm_to_label(crf1dm, f.dst);
|
966
|
+
fprintf(fp, " (%d) %s --> %s: %f\n", f.type, from, to, f.weight);
|
967
|
+
}
|
968
|
+
}
|
969
|
+
fprintf(fp, "}\n");
|
970
|
+
fprintf(fp, "\n");
|
971
|
+
|
972
|
+
/* Dump the transition features. */
|
973
|
+
fprintf(fp, "STATE_FEATURES = {\n");
|
974
|
+
for (i = 0;i < hfile->num_attrs;++i) {
|
975
|
+
crf1dm_get_attrref(crf1dm, i, &refs);
|
976
|
+
for (j = 0;j < refs.num_features;++j) {
|
977
|
+
crf1dm_feature_t f;
|
978
|
+
int fid = crf1dm_get_featureid(&refs, j);
|
979
|
+
const char *attr = NULL, *to = NULL;
|
980
|
+
|
981
|
+
crf1dm_get_feature(crf1dm, fid, &f);
|
982
|
+
#if 0
|
983
|
+
if (f.src != i) {
|
984
|
+
fprintf(fp, "WARNING: an inconsistent attribute reference.\n");
|
985
|
+
}
|
986
|
+
#endif
|
987
|
+
attr = crf1dm_to_attr(crf1dm, f.src);
|
988
|
+
to = crf1dm_to_label(crf1dm, f.dst);
|
989
|
+
fprintf(fp, " (%d) %s --> %s: %f\n", f.type, attr, to, f.weight);
|
990
|
+
}
|
991
|
+
}
|
992
|
+
fprintf(fp, "}\n");
|
993
|
+
fprintf(fp, "\n");
|
994
|
+
}
|