RubyGems - opener-opinion-detector-basic - Versions diffs - 2.0.0 → 2.0.1 - Mend

opener-opinion-detector-basic 2.0.0 → 2.0.1

Files changed (148) hide show

checksums.yaml +4 -4
data/README.md +2 -0
data/ext/hack/Rakefile +0 -2
data/lib/opener/opinion_detector_basic/version.rb +1 -1
data/opener-opinion-detector-basic.gemspec +0 -1
data/task/compile.rake +1 -1
data/task/requirements.rake +0 -1
metadata +2 -142
data/core/vendor/src/crfsuite/AUTHORS +0 -1
data/core/vendor/src/crfsuite/COPYING +0 -27
data/core/vendor/src/crfsuite/ChangeLog +0 -103
data/core/vendor/src/crfsuite/INSTALL +0 -236
data/core/vendor/src/crfsuite/Makefile.am +0 -19
data/core/vendor/src/crfsuite/Makefile.in +0 -783
data/core/vendor/src/crfsuite/README +0 -183
data/core/vendor/src/crfsuite/aclocal.m4 +0 -9018
data/core/vendor/src/crfsuite/autogen.sh +0 -38
data/core/vendor/src/crfsuite/compile +0 -143
data/core/vendor/src/crfsuite/config.guess +0 -1502
data/core/vendor/src/crfsuite/config.h.in +0 -198
data/core/vendor/src/crfsuite/config.sub +0 -1714
data/core/vendor/src/crfsuite/configure +0 -14273
data/core/vendor/src/crfsuite/configure.in +0 -149
data/core/vendor/src/crfsuite/crfsuite.sln +0 -42
data/core/vendor/src/crfsuite/depcomp +0 -630
data/core/vendor/src/crfsuite/example/chunking.py +0 -49
data/core/vendor/src/crfsuite/example/crfutils.py +0 -179
data/core/vendor/src/crfsuite/example/ner.py +0 -270
data/core/vendor/src/crfsuite/example/pos.py +0 -78
data/core/vendor/src/crfsuite/example/template.py +0 -88
data/core/vendor/src/crfsuite/frontend/Makefile.am +0 -29
data/core/vendor/src/crfsuite/frontend/Makefile.in +0 -640
data/core/vendor/src/crfsuite/frontend/dump.c +0 -116
data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +0 -129
data/core/vendor/src/crfsuite/frontend/iwa.c +0 -273
data/core/vendor/src/crfsuite/frontend/iwa.h +0 -65
data/core/vendor/src/crfsuite/frontend/learn.c +0 -439
data/core/vendor/src/crfsuite/frontend/main.c +0 -137
data/core/vendor/src/crfsuite/frontend/option.c +0 -93
data/core/vendor/src/crfsuite/frontend/option.h +0 -86
data/core/vendor/src/crfsuite/frontend/readdata.h +0 -38
data/core/vendor/src/crfsuite/frontend/reader.c +0 -136
data/core/vendor/src/crfsuite/frontend/tag.c +0 -427
data/core/vendor/src/crfsuite/genbinary.sh.in +0 -15
data/core/vendor/src/crfsuite/include/Makefile.am +0 -11
data/core/vendor/src/crfsuite/include/Makefile.in +0 -461
data/core/vendor/src/crfsuite/include/crfsuite.h +0 -1063
data/core/vendor/src/crfsuite/include/crfsuite.hpp +0 -555
data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +0 -400
data/core/vendor/src/crfsuite/include/os.h +0 -61
data/core/vendor/src/crfsuite/install-sh +0 -520
data/core/vendor/src/crfsuite/lib/cqdb/COPYING +0 -28
data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +0 -21
data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +0 -549
data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +0 -86
data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +0 -524
data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +0 -587
data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +0 -976
data/core/vendor/src/crfsuite/lib/crf/Makefile.am +0 -46
data/core/vendor/src/crfsuite/lib/crf/Makefile.in +0 -721
data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +0 -216
data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +0 -353
data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +0 -705
data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +0 -943
data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +0 -352
data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +0 -994
data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +0 -550
data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +0 -492
data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +0 -236
data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +0 -272
data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +0 -106
data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +0 -118
data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +0 -80
data/core/vendor/src/crfsuite/lib/crf/src/logging.c +0 -91
data/core/vendor/src/crfsuite/lib/crf/src/logging.h +0 -48
data/core/vendor/src/crfsuite/lib/crf/src/params.c +0 -335
data/core/vendor/src/crfsuite/lib/crf/src/params.h +0 -80
data/core/vendor/src/crfsuite/lib/crf/src/quark.c +0 -172
data/core/vendor/src/crfsuite/lib/crf/src/quark.h +0 -46
data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +0 -1107
data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +0 -160
data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +0 -408
data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +0 -242
data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +0 -507
data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +0 -338
data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +0 -435
data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +0 -341
data/core/vendor/src/crfsuite/ltmain.sh +0 -8413
data/core/vendor/src/crfsuite/missing +0 -376
data/core/vendor/src/crfsuite/swig/Makefile.am +0 -13
data/core/vendor/src/crfsuite/swig/Makefile.in +0 -365
data/core/vendor/src/crfsuite/swig/crfsuite.cpp +0 -2
data/core/vendor/src/crfsuite/swig/export.i +0 -32
data/core/vendor/src/crfsuite/swig/python/README +0 -92
data/core/vendor/src/crfsuite/swig/python/crfsuite.py +0 -329
data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +0 -14355
data/core/vendor/src/crfsuite/swig/python/export_wrap.h +0 -63
data/core/vendor/src/crfsuite/swig/python/prepare.sh +0 -9
data/core/vendor/src/crfsuite/swig/python/sample_tag.py +0 -52
data/core/vendor/src/crfsuite/swig/python/sample_train.py +0 -68
data/core/vendor/src/crfsuite/swig/python/setup.py +0 -44
data/core/vendor/src/crfsuite/win32/stdint.h +0 -679
data/core/vendor/src/liblbfgs/AUTHORS +0 -1
data/core/vendor/src/liblbfgs/COPYING +0 -22
data/core/vendor/src/liblbfgs/ChangeLog +0 -120
data/core/vendor/src/liblbfgs/INSTALL +0 -231
data/core/vendor/src/liblbfgs/Makefile.am +0 -10
data/core/vendor/src/liblbfgs/Makefile.in +0 -638
data/core/vendor/src/liblbfgs/NEWS +0 -0
data/core/vendor/src/liblbfgs/README +0 -71
data/core/vendor/src/liblbfgs/aclocal.m4 +0 -6985
data/core/vendor/src/liblbfgs/autogen.sh +0 -38
data/core/vendor/src/liblbfgs/config.guess +0 -1411
data/core/vendor/src/liblbfgs/config.h.in +0 -64
data/core/vendor/src/liblbfgs/config.sub +0 -1500
data/core/vendor/src/liblbfgs/configure +0 -21146
data/core/vendor/src/liblbfgs/configure.in +0 -107
data/core/vendor/src/liblbfgs/depcomp +0 -522
data/core/vendor/src/liblbfgs/include/lbfgs.h +0 -745
data/core/vendor/src/liblbfgs/install-sh +0 -322
data/core/vendor/src/liblbfgs/lbfgs.sln +0 -26
data/core/vendor/src/liblbfgs/lib/Makefile.am +0 -24
data/core/vendor/src/liblbfgs/lib/Makefile.in +0 -499
data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +0 -133
data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +0 -294
data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +0 -298
data/core/vendor/src/liblbfgs/lib/lbfgs.c +0 -1371
data/core/vendor/src/liblbfgs/lib/lib.vcxproj +0 -95
data/core/vendor/src/liblbfgs/ltmain.sh +0 -6426
data/core/vendor/src/liblbfgs/missing +0 -353
data/core/vendor/src/liblbfgs/sample/Makefile.am +0 -15
data/core/vendor/src/liblbfgs/sample/Makefile.in +0 -433
data/core/vendor/src/liblbfgs/sample/sample.c +0 -81
data/core/vendor/src/liblbfgs/sample/sample.cpp +0 -126
data/core/vendor/src/liblbfgs/sample/sample.vcxproj +0 -105
data/core/vendor/src/svm_light/LICENSE.txt +0 -59
data/core/vendor/src/svm_light/Makefile +0 -105
data/core/vendor/src/svm_light/kernel.h +0 -40
data/core/vendor/src/svm_light/svm_classify.c +0 -197
data/core/vendor/src/svm_light/svm_common.c +0 -985
data/core/vendor/src/svm_light/svm_common.h +0 -301
data/core/vendor/src/svm_light/svm_hideo.c +0 -1062
data/core/vendor/src/svm_light/svm_learn.c +0 -4147
data/core/vendor/src/svm_light/svm_learn.h +0 -169
data/core/vendor/src/svm_light/svm_learn_main.c +0 -397
data/core/vendor/src/svm_light/svm_loqo.c +0 -211
data/task/c.rake +0 -36
data/task/submodules.rake +0 -5

data/core/vendor/src/crfsuite/example/crfutils.py DELETED Viewed

@@ -1,179 +0,0 @@
-"""
-A miscellaneous utility for sequential labeling.
-Copyright 2010,2011 Naoaki Okazaki.
-"""
-import optparse
-import sys
-def apply_templates(X, templates):
-    """
-    Generate features for an item sequence by applying feature templates.
-    A feature template consists of a tuple of (name, offset) pairs,
-    where name and offset specify a field name and offset from which
-    the template extracts a feature value. Generated features are stored
-    in the 'F' field of each item in the sequence.
-    @type   X:      list of mapping objects
-    @param  X:      The item sequence.
-    @type   template:   tuple of (str, int)
-    @param  template:   The feature template.
-    """
-    for template in templates:
-        name = '|'.join(['%s[%d]' % (f, o) for f, o in template])
-        for t in range(len(X)):
-            values = []
-            for field, offset in template:
-                p = t + offset
-                if p not in range(len(X)):
-                    values = []
-                    break
-                values.append(X[p][field])
-            if values:
-                X[t]['F'].append('%s=%s' % (name, '|'.join(values)))
-def readiter(fi, names, sep=' '):
-    """
-    Return an iterator for item sequences read from a file object.
-    This function reads a sequence from a file object L{fi}, and
-    yields the sequence as a list of mapping objects. Each line
-    (item) from the file object is split by the separator character
-    L{sep}. Separated values of the item are named by L{names},
-    and stored in a mapping object. Every item has a field 'F' that
-    is reserved for storing features.
-    @type   fi:     file
-    @param  fi:     The file object.
-    @type   names:  tuple
-    @param  names:  The list of field names.
-    @type   sep:    str
-    @param  sep:    The separator character.
-    @rtype          list of mapping objects
-    @return         An iterator for sequences.
-    """
-    X = []
-    for line in fi:
-        line = line.strip('\n')
-        if not line:
-            yield X
-            X = []
-        else:
-            fields = line.split(sep)
-            if len(fields) < len(names):
-                raise ValueError(
-                    'Too few fields (%d) for %r\n%s' % (len(fields), names, line))
-            item = {'F': []}    # 'F' is reserved for features.
-            for i in range(len(names)):
-                item[names[i]] = fields[i]
-            X.append(item)
-def escape(src):
-    """
-    Escape colon characters from feature names.
-    @type   src:    str
-    @param  src:    A feature name
-    @rtype          str
-    @return         The feature name escaped.
-    """
-    return src.replace(':', '__COLON__')
-def output_features(fo, X, field=''):
-    """
-    Output features (and reference labels) of a sequence in CRFSuite
-    format. For each item in the sequence, this function writes a
-    reference label (if L{field} is a non-empty string) and features.
-    @type   fo:     file
-    @param  fo:     The file object.
-    @type   X:      list of mapping objects
-    @param  X:      The sequence.
-    @type   field:  str
-    @param  field:  The field name of reference labels.
-    """
-    for t in range(len(X)):
-        if field:
-            fo.write('%s' % X[t][field])
-        for a in X[t]['F']:
-            if isinstance(a, str):
-                fo.write('\t%s' % escape(a))
-            else:
-                fo.write('\t%s:%f' % (escape(a[0]), a[1]))
-        fo.write('\n')
-    fo.write('\n')
-def to_crfsuite(X):
-    """
-    Convert an item sequence into an object compatible with crfsuite
-    Python module.
-    @type   X:      list of mapping objects
-    @param  X:      The sequence.
-    @rtype          crfsuite.ItemSequence
-    @return        The same sequence in crfsuite.ItemSequence type.
-    """
-    import crfsuite
-    xseq = crfsuite.ItemSequence()
-    for x in X:
-        item = crfsuite.Item()
-        for f in x['F']:
-            if isinstance(f, str):
-                item.append(crfsuite.Attribute(escape(f)))
-            else:
-                item.append(crfsuite.Attribute(escape(f[0]), f[1]))
-        xseq.append(item)
-    return xseq
-def main(feature_extractor, fields='w pos y', sep=' '):
-    fi = sys.stdin
-    fo = sys.stdout
-    # Parse the command-line arguments.
-    parser = optparse.OptionParser(usage="""usage: %prog [options]
-This utility reads a data set from STDIN, and outputs attributes to STDOUT.
-Each line of a data set must consist of field values separated by SEPARATOR
-characters. The names and order of field values can be specified by -f option.
-The separator character can be specified with -s option. Instead of outputting
-attributes, this utility tags the input data when a model file is specified by
--t option (CRFsuite Python module must be installed)."""
-        )
-    parser.add_option(
-        '-t', dest='model',
-        help='tag the input using the model (requires "crfsuite" module)'
-        )
-    parser.add_option(
-        '-f', dest='fields', default=fields,
-        help='specify field names of input data [default: "%default"]'
-        )
-    parser.add_option(
-        '-s', dest='separator', default=sep,
-        help='specify the separator of columns of input data [default: "%default"]'
-        )
-    (options, args) = parser.parse_args()
-    # The fields of input: ('w', 'pos', 'y) by default.
-    F = options.fields.split(' ')
-    if not options.model:
-        # The generator function readiter() reads a sequence from a
-        for X in readiter(fi, F, options.separator):
-            feature_extractor(X)
-            output_features(fo, X, 'y')
-    else:
-        # Create a tagger with an existing model.
-        import crfsuite
-        tagger = crfsuite.Tagger()
-        tagger.open(options.model)
-        # For each sequence from STDIN.
-        for X in readiter(fi, F, options.separator):
-            # Obtain features.
-            feature_extractor(X)
-            xseq = to_crfsuite(X)
-            yseq = tagger.tag(xseq)
-            for t in range(len(X)):
-                v = X[t]
-                fo.write('\t'.join([v[f] for f in F]))
-                fo.write('\t%s\n' % yseq[t])
-            fo.write('\n')

data/core/vendor/src/crfsuite/example/ner.py DELETED Viewed

@@ -1,270 +0,0 @@
-#!/usr/bin/env python
-"""
-A feature extractor for named eneity recognition (NER).
-Copyright 2010,2011 Naoaki Okazaki.
-"""
-# Separator of field values.
-separator = ' '
-# Field names of the input data.
-fields = 'y w pos chk'
-import crfutils
-def get_shape(token):
-    r = ''
-    for c in token:
-        if c.isupper():
-            r += 'U'
-        elif c.islower():
-            r += 'L'
-        elif c.isdigit():
-            r += 'D'
-        elif c in ('.', ','):
-            r += '.'
-        elif c in (';', ':', '?', '!'):
-            r += ';'
-        elif c in ('+', '-', '*', '/', '=', '|', '_'):
-            r += '-'
-        elif c in ('(', '{', '[', '<'):
-            r += '('
-        elif c in (')', '}', ']', '>'):
-            r += ')'
-        else:
-            r += c
-    return r
-def degenerate(src):
-    dst = ''
-    for c in src:
-        if not dst or dst[-1] != c:
-            dst += c
-    return dst
-def get_type(token):
-    T = (
-        'AllUpper', 'AllDigit', 'AllSymbol',
-        'AllUpperDigit', 'AllUpperSymbol', 'AllDigitSymbol',
-        'AllUpperDigitSymbol',
-        'InitUpper',
-        'AllLetter',
-        'AllAlnum',
-        )
-    R = set(T)
-    if not token:
-        return 'EMPTY'
-    for i in range(len(token)):
-        c = token[i]
-        if c.isupper():
-            R.discard('AllDigit')
-            R.discard('AllSymbol')
-            R.discard('AllDigitSymbol')
-        elif c.isdigit() or c in (',', '.'):
-            R.discard('AllUpper')
-            R.discard('AllSymbol')
-            R.discard('AllUpperSymbol')
-            R.discard('AllLetter')
-        elif c.islower():
-            R.discard('AllUpper')
-            R.discard('AllDigit')
-            R.discard('AllSymbol')
-            R.discard('AllUpperDigit')
-            R.discard('AllUpperSymbol')
-            R.discard('AllDigitSymbol')
-            R.discard('AllUpperDigitSymbol')
-        else:
-            R.discard('AllUpper')
-            R.discard('AllDigit')
-            R.discard('AllUpperDigit')
-            R.discard('AllLetter')
-            R.discard('AllAlnum')
-        if i == 0 and not c.isupper():
-            R.discard('InitUpper')
-    for tag in T:
-        if tag in R:
-            return tag
-    return 'NO'
-def get_2d(token):
-    return len(token) == 2 and token.isdigit()
-def get_4d(token):
-    return len(token) == 4 and token.isdigit()
-def get_da(token):
-    bd = False
-    ba = False
-    for c in token:
-        if c.isdigit():
-            bd = True
-        elif c.isalpha():
-            ba = True
-        else:
-            return False
-    return bd and ba
-def get_dand(token, p):
-    bd = False
-    bdd = False
-    for c in token:
-        if c.isdigit():
-            bd = True
-        elif c == p:
-            bdd = True
-        else:
-            return False
-    return bd and bdd
-def get_all_other(token):
-    for c in token:
-        if c.isalnum():
-            return False
-    return True
-def get_capperiod(token):
-    return len(token) == 2 and token[0].isupper() and token[1] == '.'
-def contains_upper(token):
-    b = False
-    for c in token:
-        b |= c.isupper()
-    return b
-def contains_lower(token):
-    b = False
-    for c in token:
-        b |= c.islower()
-    return b
-def contains_alpha(token):
-    b = False
-    for c in token:
-        b |= c.isalpha()
-    return b
-def contains_digit(token):
-    b = False
-    for c in token:
-        b |= c.isdigit()
-    return b
-def contains_symbol(token):
-    b = False
-    for c in token:
-        b |= ~c.isalnum()
-    return b
-def b(v):
-    return 'yes' if v else 'no'
-def observation(v, defval=''):
-    # Lowercased token.
-    v['wl'] = v['w'].lower()
-    # Token shape.
-    v['shape'] = get_shape(v['w'])
-    # Token shape degenerated.
-    v['shaped'] = degenerate(v['shape'])
-    # Token type.
-    v['type'] = get_type(v['w'])
-    # Prefixes (length between one to four).
-    v['p1'] = v['w'][0] if len(v['w']) >= 1 else defval
-    v['p2'] = v['w'][:2] if len(v['w']) >= 2 else defval
-    v['p3'] = v['w'][:3] if len(v['w']) >= 3 else defval
-    v['p4'] = v['w'][:4] if len(v['w']) >= 4 else defval
-    # Suffixes (length between one to four).
-    v['s1'] = v['w'][-1] if len(v['w']) >= 1 else defval
-    v['s2'] = v['w'][-2:] if len(v['w']) >= 2 else defval
-    v['s3'] = v['w'][-3:] if len(v['w']) >= 3 else defval
-    v['s4'] = v['w'][-4:] if len(v['w']) >= 4 else defval
-    # Two digits
-    v['2d'] = b(get_2d(v['w']))
-    # Four digits.
-    v['4d'] = b(get_4d(v['w']))
-    # Alphanumeric token.
-    v['d&a'] = b(get_da(v['w']))
-    # Digits and '-'.
-    v['d&-'] = b(get_dand(v['w'], '-'))
-    # Digits and '/'.
-    v['d&/'] = b(get_dand(v['w'], '/'))
-    # Digits and ','.
-    v['d&,'] = b(get_dand(v['w'], ','))
-    # Digits and '.'.
-    v['d&.'] = b(get_dand(v['w'], '.'))
-    # A uppercase letter followed by '.'
-    v['up'] = b(get_capperiod(v['w']))
-    # An initial uppercase letter.
-    v['iu'] = b(v['w'] and v['w'][0].isupper())
-    # All uppercase letters.
-    v['au'] = b(v['w'].isupper())
-    # All lowercase letters.
-    v['al'] = b(v['w'].islower())
-    # All digit letters.
-    v['ad'] = b(v['w'].isdigit())
-    # All other (non-alphanumeric) letters.
-    v['ao'] = b(get_all_other(v['w']))
-    # Contains a uppercase letter.
-    v['cu'] = b(contains_upper(v['w']))
-    # Contains a lowercase letter.
-    v['cl'] = b(contains_lower(v['w']))
-    # Contains a alphabet letter.
-    v['ca'] = b(contains_alpha(v['w']))
-    # Contains a digit.
-    v['cd'] = b(contains_digit(v['w']))
-    # Contains a symbol.
-    v['cs'] = b(contains_symbol(v['w']))
-def disjunctive(X, t, field, begin, end):
-    name = '%s[%d..%d]' % (field, begin, end)
-    for offset in range(begin, end+1):
-        p = t + offset
-        if p not in range(0, len(X)):
-            continue
-        X[t]['F'].append('%s=%s' % (name, X[p][field]))
-U = [
-    'w', 'wl', 'pos', 'chk', 'shape', 'shaped', 'type',
-    'p1', 'p2', 'p3', 'p4',
-    's1', 's2', 's3', 's4',
-    '2d', '4d', 'd&a', 'd&-', 'd&/', 'd&,', 'd&.', 'up',
-    'iu', 'au', 'al', 'ad', 'ao',
-    'cu', 'cl', 'ca', 'cd', 'cs',
-    ]
-B = ['w', 'pos', 'chk', 'shaped', 'type']
-templates = []
-for name in U:
-    templates += [((name, i),) for i in range(-2, 3)]
-for name in B:
-    templates += [((name, i), (name, i+1)) for i in range(-2, 2)]
-def feature_extractor(X):
-    # Append observations.
-    for x in X:
-        observation(x)
-    # Apply the feature templates.
-    crfutils.apply_templates(X, templates)
-    # Append disjunctive features.
-    for t in range(len(X)):
-        disjunctive(X, t, 'w', -4, -1)
-        disjunctive(X, t, 'w', 1, 4)
-    # Append BOS and EOS features.
-    if X:
-        X[0]['F'].append('__BOS__')
-        X[-1]['F'].append('__EOS__')
-if __name__ == '__main__':
-    crfutils.main(feature_extractor, fields=fields, sep=separator)

data/core/vendor/src/crfsuite/example/pos.py DELETED Viewed

@@ -1,78 +0,0 @@
-#!/usr/bin/env python
-"""
-An example for part-of-speech tagging.
-Copyright 2010,2011 Naoaki Okazaki.
-"""
-# Separator of field values.
-separator = ' '
-# Field names of the input data.
-fields = 'w num cap sym p1 p2 p3 p4 s1 s2 s3 s4 y'
-# Feature template. This template is identical to the one bundled in CRF++
-# distribution, but written in a Python object.
-templates = (
-    (('num', 0), ),
-    (('cap', 0), ),
-    (('sym', 0), ),
-    (('p1', 0), ),
-    (('p2', 0), ),
-    (('p3', 0), ),
-    (('p4', 0), ),
-    (('s1', 0), ),
-    (('s2', 0), ),
-    (('s3', 0), ),
-    (('s4', 0), ),
-    (('w',  0), ),
-    (('w', -1), ),
-    (('w',  1), ),
-    (('w', -2), ),
-    (('w',  2), ),
-    (('w', -2), ('w',  -1)),
-    (('w', -1), ('w',  0)),
-    (('w',  0), ('w',  1)),
-    (('w',  1), ('w',  2)),
-    (('w', -2), ('w',  -1), ('w',  0)),
-    (('w', -1), ('w',  0), ('w',  1)),
-    (('w', 0), ('w',  1), ('w',  2)),
-    (('w', -2), ('w',  -1), ('w',  0), ('w',  1)),
-    (('w',  -1), ('w',  0), ('w',  1), ('w', 2)),
-    (('w', -2), ('w',  -1), ('w',  0), ('w',  1), ('w',  2)),
-    (('w',  0), ('w',  -1)),
-    (('w',  0), ('w',  -2)),
-    (('w',  0), ('w',  -3)),
-    (('w',  0), ('w',  -4)),
-    (('w',  0), ('w',  -5)),
-    (('w',  0), ('w',  -6)),
-    (('w',  0), ('w',  -7)),
-    (('w',  0), ('w',  -8)),
-    (('w',  0), ('w',  -9)),
-    (('w',  0), ('w',  1)),
-    (('w',  0), ('w',  2)),
-    (('w',  0), ('w',  3)),
-    (('w',  0), ('w',  4)),
-    (('w',  0), ('w',  5)),
-    (('w',  0), ('w',  6)),
-    (('w',  0), ('w',  7)),
-    (('w',  0), ('w',  8)),
-    (('w',  0), ('w',  9)),
-    )
-import crfutils
-def feature_extractor(X):
-    # Apply feature templates to obtain features (in fact, attributes)
-    crfutils.apply_templates(X, templates)
-    if X:
-	# Append BOS and EOS features manually
-        X[0]['F'].append('__BOS__')     # BOS feature
-        X[-1]['F'].append('__EOS__')    # EOS feature
-if __name__ == '__main__':
-    crfutils.main(feature_extractor, fields=fields, sep=separator)

data/core/vendor/src/crfsuite/example/template.py DELETED Viewed

@@ -1,88 +0,0 @@
-#!/usr/bin/env python
-import re
-import sys
-class FeatureExtractor:
-    def __init__(self):
-        self.macro = re.compile(r'%x\[(?P<row>[\d-]+),(?P<col>[\d]+)\]')
-        self.inst = []
-        self.t = 0
-        self.templates = []
-    def read(self, fi):
-        self.templates = []
-        for line in fi:
-            line = line.strip()
-            if line.startswith('#'):
-                continue
-            if line.startswith('U'):
-                self.templates.append(line.replace(':', '='))
-            elif line == 'B':
-                continue
-            elif line.startswith('B'):
-                sys.stderr(
-                    'ERROR: bigram templates not supported: %s\n' % line)
-                sys.exit(1)
-    def replace(self, m):
-        row = self.t + int(m.group('row'))
-        col = int(m.group('col'))
-        if row in range(0, len(self.inst)):
-            return self.inst[row]['x'][col]
-        else:
-            return ''
-    def apply(self, inst, t):
-	self.inst = inst
-	self.t = t
-        for template in self.templates:
-            f = re.sub(self.macro, self.replace, template)
-            self.inst[t]['F'].append(f)
-def readiter(fi, sep=None):
-    X = []
-    for line in fi:
-        line = line.strip('\n')
-        if not line:
-            yield X
-            X = []
-        else:
-            fields = line.split(sep)
-            item = {
-                'x': fields[0:-1],
-                'y': fields[-1],
-                'F': []
-                }
-            X.append(item)
-if __name__ == '__main__':
-    import optparse
-    fi = sys.stdin
-    fo = sys.stdout
-    # Parse the command-line arguments.
-    parser = optparse.OptionParser(usage="""usage: %prog <template>
-This utility reads a data set from STDIN, applies feature templates compatible
-with CRF++, and outputs attributes to STDOUT. Each line of a data set must
-consist of field values separated by SEPARATOR characters (customizable with
--s option)."""
-        )
-    parser.add_option(
-        '-s', dest='separator', default='\t',
-        help='specify the separator of columns of input data [default: "\\t"]'
-        )
-    (options, args) = parser.parse_args()
-    F = FeatureExtractor()
-    F.read(open(args[0]))
-    for inst in readiter(fi, options.separator):
-        for t in range(len(inst)):
-            F.apply(inst, t)
-            fo.write('%s' % inst[t]['y'])
-            for attr in inst[t]['F']:
-                fo.write('\t%s' % attr.replace(':', '__COLON__'))
-            fo.write('\n')
-        fo.write('\n')

data/core/vendor/src/crfsuite/frontend/Makefile.am DELETED Viewed

@@ -1,29 +0,0 @@
-# $Id:$
-bin_PROGRAMS = crfsuite
-#man_MANS = crfsuite.1
-#EXTRA_DIST = ${man_MANS}
-EXTRA_DIST = \
-	frontend.vcxproj
-crfsuite_SOURCES = \
-	iwa.h \
-	iwa.c \
-	option.h \
-	option.c \
-	readdata.h \
-	reader.c \
-	learn.c \
-	tag.c \
-	dump.c \
-	main.c
-#crfsuite_CPPFLAGS =
-AM_CFLAGS = @CFLAGS@
-INCLUDES = @INCLUDES@
-AM_LDFLAGS = @LDFLAGS@
-crfsuite_CFLAGS = -I$(top_builddir)/include
-crfsuite_LDADD = $(top_builddir)/lib/crf/libcrfsuite.la