ots 0.4.3 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +80 -0
- data/dictionaries/bg.xml +101 -0
- data/dictionaries/ca.xml +141 -0
- data/dictionaries/cs.xml +161 -0
- data/dictionaries/cy.xml +118 -0
- data/dictionaries/da.xml +129 -0
- data/dictionaries/de.xml +354 -0
- data/dictionaries/el.xml +80 -0
- data/dictionaries/en.xml +606 -0
- data/dictionaries/eo.xml +171 -0
- data/dictionaries/es.xml +369 -0
- data/dictionaries/et.xml +172 -0
- data/dictionaries/eu.xml +77 -0
- data/dictionaries/fi.xml +105 -0
- data/dictionaries/fr.xml +199 -0
- data/dictionaries/ga.xml +124 -0
- data/dictionaries/gl.xml +290 -0
- data/dictionaries/he.xml +334 -0
- data/dictionaries/hu.xml +280 -0
- data/dictionaries/ia.xml +97 -0
- data/dictionaries/id.xml +75 -0
- data/dictionaries/is.xml +201 -0
- data/dictionaries/it.xml +206 -0
- data/dictionaries/lv.xml +77 -0
- data/dictionaries/mi.xml +76 -0
- data/dictionaries/ms.xml +160 -0
- data/dictionaries/mt.xml +73 -0
- data/dictionaries/nl.xml +245 -0
- data/dictionaries/nn.xml +264 -0
- data/dictionaries/pl.xml +92 -0
- data/dictionaries/pt.xml +365 -0
- data/dictionaries/ro.xml +163 -0
- data/dictionaries/ru.xml +150 -0
- data/dictionaries/sv.xml +255 -0
- data/dictionaries/tl.xml +67 -0
- data/dictionaries/tr.xml +65 -0
- data/dictionaries/uk.xml +98 -0
- data/dictionaries/yi.xml +293 -0
- data/ext/article.c +119 -0
- data/ext/dictionary.c +335 -0
- data/ext/extconf.rb +13 -14
- data/ext/grader-tc.c +185 -0
- data/ext/grader-tc.h +64 -0
- data/ext/grader-tf.c +116 -0
- data/ext/grader.c +85 -0
- data/ext/highlighter.c +128 -0
- data/ext/html.c +131 -0
- data/ext/libots.h +158 -0
- data/ext/ots.c +130 -151
- data/ext/ots.h +15 -0
- data/ext/parser.c +173 -0
- data/ext/relations.c +163 -0
- data/ext/stemmer.c +332 -0
- data/ext/text.c +98 -0
- data/ext/version.h +2 -0
- data/ext/wordlist.c +220 -0
- data/test/helper.rb +3 -0
- data/test/test_article.rb +52 -0
- data/test/test_ots.rb +23 -0
- metadata +122 -38
- data/README +0 -25
- data/VERSION +0 -1
- data/lib/ots.rb +0 -1
- data/test/ots_test.rb +0 -62
data/ext/extconf.rb
CHANGED
@@ -1,26 +1,25 @@
|
|
1
1
|
require 'mkmf'
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
glib_cflags = %x{pkg-config --cflags glib-2.0}.strip
|
4
|
+
glib_ldflags = %x{pkg-config --libs glib-2.0}.strip
|
5
5
|
|
6
|
-
if
|
6
|
+
if glib_cflags.empty?
|
7
7
|
warn %q{WARNING: No pkg-config found for glib-2.0, using defaults. Set GLIB_INCLUDE_DIR env to override.}
|
8
8
|
dirs = ENV.fetch('GLIB_INCLUDE_DIR', '/usr/include/glib-2.0 /usr/lib/glib-2.0/include')
|
9
|
-
|
9
|
+
glib_cflags = dirs.split(/\s+/).map {|dir| "-I#{dir}"}.join(' ')
|
10
10
|
end
|
11
11
|
|
12
|
-
if
|
12
|
+
if glib_ldflags.empty?
|
13
13
|
warn %q{WARNING: No pkg-config found for glib-2.0, using defaults. Set GLIB_LIB env to override.}
|
14
14
|
libs = ENV.fetch('GLIB_LIB', 'glib-2.0')
|
15
|
-
|
15
|
+
glib_ldflags = libs.split(/\s+/).map {|lib| "-l#{lib}"}.join(' ')
|
16
16
|
end
|
17
17
|
|
18
|
-
|
18
|
+
dir = File.expand_path(File.dirname(__FILE__) + '/../dictionaries')
|
19
|
+
$CFLAGS = glib_cflags + %Q{ -I/usr/include/libxml2 -DDICTIONARY_DIR='"#{dir}/"'}
|
20
|
+
$LDFLAGS = glib_ldflags
|
19
21
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
puts "Cannot find libots headers or libraries"
|
25
|
-
exit 1
|
26
|
-
end
|
22
|
+
find_library('glib-2.0', 'main')
|
23
|
+
find_library('xml2', 'main')
|
24
|
+
|
25
|
+
create_makefile 'ots'
|
data/ext/grader-tc.c
ADDED
@@ -0,0 +1,185 @@
|
|
1
|
+
/*
|
2
|
+
* grader-tc.c
|
3
|
+
*
|
4
|
+
* Copyright (C) 2003 Nadav Rotem <nadav256@hotmail.com>
|
5
|
+
*
|
6
|
+
* This program is free software; you can redistribute it and/or modify
|
7
|
+
* it under the terms of the GNU General Public License as published by
|
8
|
+
* the Free Software Foundation; either version 2 of the License, or
|
9
|
+
* (at your option) any later version.
|
10
|
+
*
|
11
|
+
* This program is distributed in the hope that it will be useful,
|
12
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
* GNU Library General Public License for more details.
|
15
|
+
*
|
16
|
+
* You should have received a copy of the GNU General Public License
|
17
|
+
* along with this program; if not, write to the Free Software
|
18
|
+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
19
|
+
*/
|
20
|
+
|
21
|
+
#include <stdio.h>
|
22
|
+
#include <stdlib.h>
|
23
|
+
#include <string.h>
|
24
|
+
#include "libots.h"
|
25
|
+
|
26
|
+
|
27
|
+
#include "grader-tc.h"
|
28
|
+
|
29
|
+
|
30
|
+
/*Grader - Term count algorithm*/
|
31
|
+
/*This is non-normelized term frequency algorithm without using inverse document frequency database */
|
32
|
+
|
33
|
+
#define NUM_KEY_WORDS 100 /* use first n key words only */
|
34
|
+
|
35
|
+
int
|
36
|
+
ots_get_article_word_count (const OtsArticle * Doc)
|
37
|
+
{
|
38
|
+
GList *li;
|
39
|
+
int articleWC;
|
40
|
+
articleWC = 0;
|
41
|
+
|
42
|
+
if (Doc==NULL) return 0;
|
43
|
+
|
44
|
+
for (li = (GList *) Doc->lines; li != NULL; li = li->next)
|
45
|
+
{
|
46
|
+
articleWC += ((OtsSentence *) li->data)->wc;
|
47
|
+
}
|
48
|
+
|
49
|
+
return articleWC;
|
50
|
+
}
|
51
|
+
|
52
|
+
|
53
|
+
/*take this line and add each word to the "wordStat" list
|
54
|
+
* this list will hold all of the words in the article and the number
|
55
|
+
* of times they appeared in the article.
|
56
|
+
*/
|
57
|
+
|
58
|
+
static void
|
59
|
+
ots_line_add_wordlist(OtsArticle * Doc,const OtsSentence * aLine)
|
60
|
+
{
|
61
|
+
GList *li;
|
62
|
+
if ((aLine==NULL) ||(NULL==Doc)) { return;}
|
63
|
+
|
64
|
+
for (li = (GList *) aLine->words; li != NULL; li = li->next) /* for each word in the sentence Do: */
|
65
|
+
if (li->data && strlen (li->data)) ots_add_wordstat (Doc, (char *)li->data);
|
66
|
+
|
67
|
+
return;
|
68
|
+
}
|
69
|
+
|
70
|
+
static void
|
71
|
+
ots_create_wordlist(OtsArticle * Doc)
|
72
|
+
{
|
73
|
+
GList *line;
|
74
|
+
if (Doc==NULL) return;
|
75
|
+
|
76
|
+
for (line = (GList *) Doc->lines; line != NULL; line = line->next)
|
77
|
+
{
|
78
|
+
OtsSentence * aLine=line->data;
|
79
|
+
if (aLine)
|
80
|
+
ots_line_add_wordlist(Doc,aLine);
|
81
|
+
}
|
82
|
+
}
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
static int
|
88
|
+
keyVal (const int n) /* Ugly , I know */
|
89
|
+
{
|
90
|
+
if (n == 1) return 3;
|
91
|
+
if (n == 2) return 2;
|
92
|
+
if (n == 3) return 2;
|
93
|
+
if (n == 4) return 2;
|
94
|
+
return 1;
|
95
|
+
}
|
96
|
+
|
97
|
+
|
98
|
+
static void
|
99
|
+
ots_grade_line (GList *impList, OtsSentence * aLine,
|
100
|
+
OtsStemRule * rule)
|
101
|
+
{
|
102
|
+
GList *li;
|
103
|
+
GList *di;
|
104
|
+
int n;
|
105
|
+
char *tmp_stem;
|
106
|
+
|
107
|
+
if ((aLine==NULL)||(rule==NULL)||(impList==NULL)) return;
|
108
|
+
|
109
|
+
for (li = (GList *) aLine->words; li != NULL; li = li->next) /* for each word */
|
110
|
+
{
|
111
|
+
n = 0;
|
112
|
+
tmp_stem = ots_stem_strip ((unsigned char *) li->data, rule);
|
113
|
+
|
114
|
+
for (di = (GList *) impList;
|
115
|
+
((di != NULL) && (n < NUM_KEY_WORDS)); di = di->next)
|
116
|
+
{
|
117
|
+
n++;
|
118
|
+
if ((NULL!=((OtsWordEntery *) di->data)->stem) && (NULL!=tmp_stem))
|
119
|
+
if (0 == strcmp ((((OtsWordEntery *) di->data)->stem), tmp_stem))
|
120
|
+
{
|
121
|
+
/* debug:
|
122
|
+
if (0!=strcmp((((OtsWordEntery *) di->data)->word),li->data))
|
123
|
+
printf("[%s][%s] stem[%s]\n",(((OtsWordEntery *) di->data)->word),li->data,tmp);*/
|
124
|
+
|
125
|
+
aLine->score += (((OtsWordEntery *) di->data)->occ) * keyVal (n);
|
126
|
+
}
|
127
|
+
|
128
|
+
}
|
129
|
+
|
130
|
+
g_free (tmp_stem);
|
131
|
+
}
|
132
|
+
|
133
|
+
}
|
134
|
+
|
135
|
+
|
136
|
+
void
|
137
|
+
ots_create_title_tc(OtsArticle * Doc)
|
138
|
+
{
|
139
|
+
|
140
|
+
char *tmp;
|
141
|
+
char *word;
|
142
|
+
int i;
|
143
|
+
GString *title;
|
144
|
+
if (NULL==Doc) return;
|
145
|
+
|
146
|
+
title=g_string_new(NULL);
|
147
|
+
|
148
|
+
for (i=0;i<5;i++)
|
149
|
+
{
|
150
|
+
word = ots_word_in_list(Doc->ImpWords,i);
|
151
|
+
if (word) g_string_append(title,word); else break;
|
152
|
+
if (i<4) g_string_append(title,",");
|
153
|
+
}
|
154
|
+
|
155
|
+
tmp=title->str;
|
156
|
+
if (NULL!=title) g_string_free(title,FALSE);
|
157
|
+
Doc->title=tmp;
|
158
|
+
}
|
159
|
+
|
160
|
+
|
161
|
+
void
|
162
|
+
ots_grade_doc_tc (OtsArticle * Doc)
|
163
|
+
{
|
164
|
+
|
165
|
+
GList *li;
|
166
|
+
if (NULL==Doc) return;
|
167
|
+
ots_create_wordlist(Doc);
|
168
|
+
|
169
|
+
|
170
|
+
Doc->ImpWords=ots_union_list (Doc->wordStat, Doc->dict); /* subtract from the Article wordlist all the words in the dic file (on , the , is...) */
|
171
|
+
Doc->ImpWords=ots_sort_list (Doc->ImpWords); /* sort the list , top 3 is what the article talks about (SARS , virus , cure ... ) */
|
172
|
+
|
173
|
+
/*to print wordlist: ots_print_wordlist (stdout, Doc->ImpWords);*/
|
174
|
+
|
175
|
+
if (0 == Doc->lineCount) return;
|
176
|
+
|
177
|
+
for (li = (GList *) Doc->lines; li != NULL; li = li->next)
|
178
|
+
{
|
179
|
+
if (li->data)
|
180
|
+
ots_grade_line (Doc->ImpWords, (OtsSentence *) li->data, Doc->stem);
|
181
|
+
}
|
182
|
+
|
183
|
+
|
184
|
+
ots_create_title_tc(Doc);
|
185
|
+
}
|
data/ext/grader-tc.h
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
/*
|
2
|
+
* grader-tc.h
|
3
|
+
*
|
4
|
+
* Copyright (C) 2003 Nadav Rotem <nadav256@hotmail.com>
|
5
|
+
*
|
6
|
+
* This program is free software; you can redistribute it and/or modify
|
7
|
+
* it under the terms of the GNU General Public License as published by
|
8
|
+
* the Free Software Foundation; either version 2 of the License, or
|
9
|
+
* (at your option) any later version.
|
10
|
+
*
|
11
|
+
* This program is distributed in the hope that it will be useful,
|
12
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
* GNU Library General Public License for more details.
|
15
|
+
*
|
16
|
+
* You should have received a copy of the GNU General Public License
|
17
|
+
* along with this program; if not, write to the Free Software
|
18
|
+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
19
|
+
*/
|
20
|
+
|
21
|
+
#ifndef HAVE_GRADERTC_H
|
22
|
+
#define HAVE_GRADERTC_H
|
23
|
+
|
24
|
+
|
25
|
+
#include <glib.h>
|
26
|
+
#include "libots.h"
|
27
|
+
|
28
|
+
G_BEGIN_DECLS
|
29
|
+
|
30
|
+
|
31
|
+
typedef struct
|
32
|
+
{
|
33
|
+
gchar *word; /* the word */
|
34
|
+
gchar *stem; /*stem of the word*/
|
35
|
+
gint occ; /* how many times have we seen this word in the text? */
|
36
|
+
} OtsWordEntery;
|
37
|
+
|
38
|
+
/*Word list manipulations*/
|
39
|
+
void ots_free_wordlist (GList *aList);
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
OtsWordEntery *ots_copy_wordEntery (OtsWordEntery * obj);
|
44
|
+
OtsWordEntery *ots_new_wordEntery (unsigned const char *wordString);
|
45
|
+
OtsWordEntery *ots_new_wordEntery_strip (unsigned const char *wordString,const OtsStemRule *rule);
|
46
|
+
void ots_free_wordEntery (OtsWordEntery * WC);
|
47
|
+
|
48
|
+
GList *ots_sort_list (GList* aList);
|
49
|
+
GList *ots_union_list (const GList *aLst, const GList * bLst);
|
50
|
+
|
51
|
+
char *ots_word_in_list (const GList *aList,const int index);
|
52
|
+
char *ots_stem_in_list (const GList *aList,const int index);
|
53
|
+
void ots_add_wordstat (OtsArticle * Doc,unsigned const char *wordString);
|
54
|
+
|
55
|
+
|
56
|
+
/*grader*/
|
57
|
+
|
58
|
+
void ots_grade_doc_tc (OtsArticle * Doc);
|
59
|
+
|
60
|
+
G_END_DECLS
|
61
|
+
|
62
|
+
|
63
|
+
|
64
|
+
#endif /* HAVE_GRADERTC_H */
|
data/ext/grader-tf.c
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
/*
|
2
|
+
* grader-tf.c
|
3
|
+
*
|
4
|
+
* Copyright (C) 2003 Nadav Rotem <nadav256@hotmail.com>
|
5
|
+
*
|
6
|
+
* This program is free software; you can redistribute it and/or modify
|
7
|
+
* it under the terms of the GNU General Public License as published by
|
8
|
+
* the Free Software Foundation; either version 2 of the License, or
|
9
|
+
* (at your option) any later version.
|
10
|
+
*
|
11
|
+
* This program is distributed in the hope that it will be useful,
|
12
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
* GNU Library General Public License for more details.
|
15
|
+
*
|
16
|
+
* You should have received a copy of the GNU General Public License
|
17
|
+
* along with this program; if not, write to the Free Software
|
18
|
+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
19
|
+
*/
|
20
|
+
|
21
|
+
#include <stdio.h>
|
22
|
+
#include <stdlib.h>
|
23
|
+
#include <string.h>
|
24
|
+
#include "libots.h"
|
25
|
+
|
26
|
+
/*Grader - using the Term frequency algorithm. Will give each line a score*/
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
OtsWordTF*
|
31
|
+
ots_new_OtsWordTF(const char* word,const double tf)
|
32
|
+
{
|
33
|
+
OtsWordTF* obj=g_new0(OtsWordTF,1);
|
34
|
+
if (word!=NULL) obj->word=g_strdup(word);
|
35
|
+
obj->tf=tf;
|
36
|
+
return obj;
|
37
|
+
}
|
38
|
+
|
39
|
+
void
|
40
|
+
ots_free_OtsWordTF(OtsWordTF *obj)
|
41
|
+
{
|
42
|
+
if (obj!=NULL)
|
43
|
+
{
|
44
|
+
if (obj->word!=NULL) g_free(obj->word);
|
45
|
+
g_free(obj);
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
void
|
50
|
+
ots_free_TF_wordlist (GList * aList)
|
51
|
+
{
|
52
|
+
if (aList != NULL)
|
53
|
+
{
|
54
|
+
g_list_foreach(aList,(GFunc)ots_free_OtsWordTF, NULL);
|
55
|
+
g_list_free(aList);
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
|
60
|
+
void
|
61
|
+
ots_grade_line_tf (OtsSentence * aLine)
|
62
|
+
{
|
63
|
+
|
64
|
+
return;
|
65
|
+
}
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
void
|
70
|
+
ots_grade_doc_tf (OtsArticle * Doc)
|
71
|
+
{
|
72
|
+
|
73
|
+
GList *li;
|
74
|
+
|
75
|
+
/*Load tf list*/
|
76
|
+
/*Load idf list*/
|
77
|
+
|
78
|
+
if (0 == Doc->lineCount) return;
|
79
|
+
|
80
|
+
for (li = (GList *) Doc->lines; li != NULL; li = li->next)
|
81
|
+
{
|
82
|
+
ots_grade_line_tf ((OtsSentence *) li->data /* , tf list , idf list*/);
|
83
|
+
}
|
84
|
+
|
85
|
+
return;
|
86
|
+
}
|
87
|
+
|
88
|
+
|
89
|
+
double
|
90
|
+
ots_tf_word_score (const double tf,const double idf)
|
91
|
+
/*IDF: how rare is word across the collection
|
92
|
+
TF: how often is word in doc */
|
93
|
+
{
|
94
|
+
|
95
|
+
return tf*idf;
|
96
|
+
}
|
97
|
+
|
98
|
+
/*
|
99
|
+
Determine frequency of query words
|
100
|
+
n = (num-of-sentences words appears in)
|
101
|
+
N = (total-number-of-sentences)
|
102
|
+
f = n/N
|
103
|
+
*/
|
104
|
+
|
105
|
+
double
|
106
|
+
ots_calc_idf (const int term_count,const int doc_word_count)
|
107
|
+
{
|
108
|
+
return -log(doc_word_count/term_count);
|
109
|
+
}
|
110
|
+
|
111
|
+
double
|
112
|
+
ots_calc_tf (const int term_count,const int doc_word_count)
|
113
|
+
{
|
114
|
+
if (term_count==0) return 0; else
|
115
|
+
return 0.5+0.5*(doc_word_count/term_count);
|
116
|
+
}
|
data/ext/grader.c
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
/*
|
2
|
+
* grader.c
|
3
|
+
*
|
4
|
+
* Copyright (C) 2003 Nadav Rotem <nadav256@hotmail.com>
|
5
|
+
*
|
6
|
+
* This program is free software; you can redistribute it and/or modify
|
7
|
+
* it under the terms of the GNU General Public License as published by
|
8
|
+
* the Free Software Foundation; either version 2 of the License, or
|
9
|
+
* (at your option) any later version.
|
10
|
+
*
|
11
|
+
* This program is distributed in the hope that it will be useful,
|
12
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
* GNU Library General Public License for more details.
|
15
|
+
*
|
16
|
+
* You should have received a copy of the GNU General Public License
|
17
|
+
* along with this program; if not, write to the Free Software
|
18
|
+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
19
|
+
*/
|
20
|
+
|
21
|
+
#include <stdio.h>
|
22
|
+
#include <stdlib.h>
|
23
|
+
#include <string.h>
|
24
|
+
#include "libots.h"
|
25
|
+
|
26
|
+
extern void ots_grade_doc_tc (OtsArticle * Doc);
|
27
|
+
|
28
|
+
/*Grader driver - will call one of the grading algorithm*/
|
29
|
+
|
30
|
+
|
31
|
+
|
32
|
+
void
|
33
|
+
ots_grade_structure (OtsArticle * Doc) /*must be called after the first grader*/
|
34
|
+
{
|
35
|
+
GList *li;
|
36
|
+
GList *first;
|
37
|
+
GList *second;
|
38
|
+
OtsSentence *first_line=NULL;
|
39
|
+
|
40
|
+
first = NULL;
|
41
|
+
second = NULL;
|
42
|
+
|
43
|
+
if (Doc==NULL) return;
|
44
|
+
|
45
|
+
if (Doc->lines!=NULL)
|
46
|
+
first_line= ((OtsSentence *) (Doc->lines->data));
|
47
|
+
if (NULL!=first_line) first_line->score *= 2; /*first line/title is very important so we increase its score */
|
48
|
+
|
49
|
+
/*This loop will *1.6 the score of each line that
|
50
|
+
starts with \n \n , in other words a new paragraph*/
|
51
|
+
|
52
|
+
for (li = (GList *) Doc->lines; li != NULL; li = li->next)
|
53
|
+
{
|
54
|
+
OtsSentence *aLine = (li->data);
|
55
|
+
if (NULL != aLine) /*line is there */
|
56
|
+
{
|
57
|
+
first = aLine->words; /*first word? */
|
58
|
+
if (NULL != first)
|
59
|
+
second = first->next; /*second word? */
|
60
|
+
if ((NULL != first) && (NULL != second)) /*have content? */
|
61
|
+
if (strcmp (first->data, "\n") && strcmp (second->data, "\n")) /*new paragraph? */
|
62
|
+
aLine->score *= 1.6;
|
63
|
+
}
|
64
|
+
|
65
|
+
}
|
66
|
+
|
67
|
+
}
|
68
|
+
|
69
|
+
/**
|
70
|
+
Each grader needs to do:
|
71
|
+
1.give a ->score to each line
|
72
|
+
2.Set the ->title of the document
|
73
|
+
**/
|
74
|
+
|
75
|
+
void
|
76
|
+
ots_grade_doc (OtsArticle * Doc)
|
77
|
+
{
|
78
|
+
|
79
|
+
if (Doc==NULL) return;
|
80
|
+
ots_grade_doc_tc(Doc); /*Term count*/
|
81
|
+
|
82
|
+
/* or ots_grade_doc_fc (Doc); Term Frequency */
|
83
|
+
|
84
|
+
ots_grade_structure (Doc);
|
85
|
+
}
|