summarize 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +11 -0
- data/README.markdown +42 -0
- data/Rakefile +49 -0
- data/ext/summarize/article.c +119 -0
- data/ext/summarize/dic/bg.xml +101 -0
- data/ext/summarize/dic/ca.xml +141 -0
- data/ext/summarize/dic/cs.xml +161 -0
- data/ext/summarize/dic/cy.xml +118 -0
- data/ext/summarize/dic/da.xml +129 -0
- data/ext/summarize/dic/de.xml +354 -0
- data/ext/summarize/dic/el.xml +80 -0
- data/ext/summarize/dic/en.xml +606 -0
- data/ext/summarize/dic/eo.xml +171 -0
- data/ext/summarize/dic/es.xml +369 -0
- data/ext/summarize/dic/et.xml +172 -0
- data/ext/summarize/dic/eu.xml +77 -0
- data/ext/summarize/dic/fi.xml +105 -0
- data/ext/summarize/dic/fr.xml +199 -0
- data/ext/summarize/dic/ga.xml +124 -0
- data/ext/summarize/dic/gl.xml +290 -0
- data/ext/summarize/dic/he.xml +334 -0
- data/ext/summarize/dic/hu.xml +280 -0
- data/ext/summarize/dic/ia.xml +97 -0
- data/ext/summarize/dic/id.xml +75 -0
- data/ext/summarize/dic/is.xml +201 -0
- data/ext/summarize/dic/it.xml +206 -0
- data/ext/summarize/dic/lv.xml +77 -0
- data/ext/summarize/dic/mi.xml +76 -0
- data/ext/summarize/dic/ms.xml +160 -0
- data/ext/summarize/dic/mt.xml +73 -0
- data/ext/summarize/dic/nl.xml +245 -0
- data/ext/summarize/dic/nn.xml +264 -0
- data/ext/summarize/dic/pl.xml +92 -0
- data/ext/summarize/dic/pt.xml +365 -0
- data/ext/summarize/dic/ro.xml +163 -0
- data/ext/summarize/dic/ru.xml +150 -0
- data/ext/summarize/dic/sv.xml +255 -0
- data/ext/summarize/dic/tl.xml +67 -0
- data/ext/summarize/dic/tr.xml +65 -0
- data/ext/summarize/dic/uk.xml +98 -0
- data/ext/summarize/dic/yi.xml +293 -0
- data/ext/summarize/dictionary.c +331 -0
- data/ext/summarize/extconf.rb +6 -0
- data/ext/summarize/grader-tc.c +185 -0
- data/ext/summarize/grader-tc.h +64 -0
- data/ext/summarize/grader-tf.c +116 -0
- data/ext/summarize/grader.c +85 -0
- data/ext/summarize/highlighter.c +128 -0
- data/ext/summarize/html.c +131 -0
- data/ext/summarize/libots.h +158 -0
- data/ext/summarize/parser.c +173 -0
- data/ext/summarize/relations.c +163 -0
- data/ext/summarize/stemmer.c +332 -0
- data/ext/summarize/summarize.c +43 -0
- data/ext/summarize/summarize.h +12 -0
- data/ext/summarize/text.c +98 -0
- data/ext/summarize/wordlist.c +220 -0
- data/lib/summarize.rb +91 -0
- data/lib/summarize/summarize.bundle +0 -0
- data/sample_data/jupiter.txt +15 -0
- data/summarize.gemspec +21 -0
- metadata +140 -0
@@ -0,0 +1,158 @@
|
|
1
|
+
/*
|
2
|
+
* libots.h
|
3
|
+
*
|
4
|
+
* Copyright (C) 2003 Nadav Rotem <nadav256@hotmail.com>
|
5
|
+
*
|
6
|
+
* This program is free software; you can redistribute it and/or modify
|
7
|
+
* it under the terms of the GNU General Public License as published by
|
8
|
+
* the Free Software Foundation; either version 2 of the License, or
|
9
|
+
* (at your option) any later version.
|
10
|
+
*
|
11
|
+
* This program is distributed in the hope that it will be useful,
|
12
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
* GNU Library General Public License for more details.
|
15
|
+
*
|
16
|
+
* You should have received a copy of the GNU General Public License
|
17
|
+
* along with this program; if not, write to the Free Software
|
18
|
+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
19
|
+
*/
|
20
|
+
|
21
|
+
#ifndef HAVE_LIBOTS_H
|
22
|
+
#define HAVE_LIBOTS_H
|
23
|
+
|
24
|
+
#include <glib.h>
|
25
|
+
|
26
|
+
G_BEGIN_DECLS
|
27
|
+
|
28
|
+
typedef struct
|
29
|
+
{ /* the Term Frequency data structure */
|
30
|
+
char* word;
|
31
|
+
double tf; /*Also used for TF*/
|
32
|
+
} OtsWordTF;
|
33
|
+
|
34
|
+
|
35
|
+
typedef struct
|
36
|
+
{
|
37
|
+
/*a GList of char* */
|
38
|
+
GList *RemovePre; /* (a|b) replace string a with b */
|
39
|
+
GList *RemovePost;
|
40
|
+
GList *step1_pre;
|
41
|
+
GList *step1_post;
|
42
|
+
|
43
|
+
GList *synonyms;
|
44
|
+
GList *manual;
|
45
|
+
|
46
|
+
GList *ParserBreak;
|
47
|
+
GList *ParserDontBreak;
|
48
|
+
|
49
|
+
|
50
|
+
/*to be implemented*/
|
51
|
+
GList *ReplaceChars;
|
52
|
+
|
53
|
+
} OtsStemRule;
|
54
|
+
|
55
|
+
|
56
|
+
typedef struct
|
57
|
+
{
|
58
|
+
GList *words; /* a Glist of words (char*) */
|
59
|
+
glong score; /*score set by the grader*/
|
60
|
+
gboolean selected; /*is selected?*/
|
61
|
+
gint wc; /*word count*/
|
62
|
+
void *user_data; /*pointer to the original sentence , or serial number maybe*/
|
63
|
+
} OtsSentence;
|
64
|
+
|
65
|
+
|
66
|
+
typedef struct
|
67
|
+
{
|
68
|
+
GList *lines; /* a Glist of sentences (struct Sentence) */
|
69
|
+
gint lineCount; /*lines in the text*/
|
70
|
+
char *title; /*title , auto generated*/
|
71
|
+
|
72
|
+
OtsStemRule *stem; /*stemming & parsing rules*/
|
73
|
+
|
74
|
+
/*Term Frequency grader*/
|
75
|
+
GList *tf_terms;
|
76
|
+
GList *idf_terms;
|
77
|
+
|
78
|
+
|
79
|
+
/*Term Count grader*/
|
80
|
+
GList *dict; /* dictionary from xml*/
|
81
|
+
GList *wordStat; /* a wordlist of all words in the article and their occ */
|
82
|
+
GList *ImpWords; /*important words - for term count grader*/
|
83
|
+
|
84
|
+
|
85
|
+
} OtsArticle;
|
86
|
+
|
87
|
+
|
88
|
+
OtsArticle *ots_new_article (void);
|
89
|
+
void ots_free_article (OtsArticle *art);
|
90
|
+
|
91
|
+
/*parser*/
|
92
|
+
void ots_parse_file (FILE * stream, OtsArticle * Doc); /*file input */
|
93
|
+
void ots_parse_stream(const unsigned char *utf8 , size_t len ,OtsArticle *Doc); /*parse unicode stream*/
|
94
|
+
|
95
|
+
OtsSentence *ots_append_line (OtsArticle * Doc);
|
96
|
+
void ots_append_word (OtsSentence * aLine,unsigned const char *aWord);
|
97
|
+
void ots_add_wordstat (OtsArticle * Doc,unsigned const char *wordString);
|
98
|
+
|
99
|
+
|
100
|
+
/*dictionary*/
|
101
|
+
gboolean ots_load_xml_dictionary (OtsArticle * Doc,unsigned const char *name);
|
102
|
+
|
103
|
+
int ots_get_article_word_count (const OtsArticle * Doc);
|
104
|
+
|
105
|
+
|
106
|
+
/*grader*/
|
107
|
+
void ots_highlight_doc (OtsArticle * Doc, int percent); /*example: 20%*/
|
108
|
+
void ots_highlight_doc_lines (OtsArticle * Doc, int lines); /*example: 10 lines*/
|
109
|
+
void ots_highlight_doc_words (OtsArticle * Doc, int words); /*example: 50 words*/
|
110
|
+
|
111
|
+
void ots_grade_doc (OtsArticle * Doc);
|
112
|
+
|
113
|
+
void ots_free_OtsWordTF(OtsWordTF *obj); /*todo: put in .h file*/
|
114
|
+
OtsWordTF* ots_new_OtsWordTF(const char* word,const double idf);
|
115
|
+
|
116
|
+
|
117
|
+
/*HTML output*/
|
118
|
+
void ots_print_HTML (FILE * stream, const OtsArticle * Doc);
|
119
|
+
unsigned char *ots_get_doc_HTML (const OtsArticle * Doc, size_t * out_len);
|
120
|
+
|
121
|
+
/*TEXT output*/
|
122
|
+
void ots_print_doc (FILE * stream, const OtsArticle * Doc);
|
123
|
+
unsigned char *ots_get_doc_text (const OtsArticle * Doc, size_t * out_len);
|
124
|
+
|
125
|
+
|
126
|
+
/*Plugin writing*/
|
127
|
+
unsigned char* ots_get_line_text (const OtsSentence *aLine, gboolean only_if_selected, size_t *out_size);
|
128
|
+
gboolean ots_is_line_selected(const OtsSentence *aLine);
|
129
|
+
|
130
|
+
/*Stemm support*/
|
131
|
+
OtsStemRule *new_stem_rule(void);
|
132
|
+
void free_stem_rule (OtsStemRule *rule);
|
133
|
+
unsigned char * ots_stem_strip (unsigned const char * aWord, const OtsStemRule *rule); /*returns newly allocated string with the root of the word*/
|
134
|
+
unsigned char *ots_stem_format (unsigned const char *aWord, const OtsStemRule * rule); /*Remove leading spaces, comas, colons, etc. */
|
135
|
+
|
136
|
+
/*Relations between texts*/
|
137
|
+
|
138
|
+
/*Returns the number of topics that two blocks of text share*/
|
139
|
+
int ots_text_relations(
|
140
|
+
const unsigned char *text1,const unsigned char *lang_code1,
|
141
|
+
const unsigned char *text2,const unsigned char *lang_code2,const int topic_num);
|
142
|
+
|
143
|
+
/*For a given text, return the list of the topics*/
|
144
|
+
char* ots_text_topics(const unsigned char *text,const unsigned char *lang_code,int topic_num);
|
145
|
+
|
146
|
+
|
147
|
+
/*For a given text, return the list of the stemmed topics*/
|
148
|
+
GList* ots_text_stem_list(const unsigned char *text,const unsigned char *lang_code,int topic_num);
|
149
|
+
|
150
|
+
|
151
|
+
/*Gives a score on the relations between two lists of topics; simmilar to the inner product*/
|
152
|
+
int ots_topic_list_score(const GList *topic_list1,const GList *topic_list2);
|
153
|
+
|
154
|
+
G_END_DECLS
|
155
|
+
|
156
|
+
|
157
|
+
|
158
|
+
#endif /* HAVE_LIBOTS_H */
|
@@ -0,0 +1,173 @@
|
|
1
|
+
/*
|
2
|
+
* parser.c
|
3
|
+
*
|
4
|
+
* Copyright (C) 2003 Nadav Rotem <nadav256@hotmail.com>
|
5
|
+
*
|
6
|
+
* This program is free software; you can redistribute it and/or modify
|
7
|
+
* it under the terms of the GNU General Public License as published by
|
8
|
+
* the Free Software Foundation; either version 2 of the License, or
|
9
|
+
* (at your option) any later version.
|
10
|
+
*
|
11
|
+
* This program is distributed in the hope that it will be useful,
|
12
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
* GNU Library General Public License for more details.
|
15
|
+
*
|
16
|
+
* You should have received a copy of the GNU General Public License
|
17
|
+
* along with this program; if not, write to the Free Software
|
18
|
+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
19
|
+
*/
|
20
|
+
|
21
|
+
#include <stdio.h>
|
22
|
+
#include <stdlib.h>
|
23
|
+
#include <string.h>
|
24
|
+
#include <strings.h>
|
25
|
+
#include "libots.h"
|
26
|
+
|
27
|
+
#define BUFFER_SIZE (1024*8)
|
28
|
+
|
29
|
+
int
|
30
|
+
ots_match_post (const char *aWord,const char *post)
|
31
|
+
{
|
32
|
+
int i, wlen, plen;
|
33
|
+
|
34
|
+
|
35
|
+
wlen = strlen (aWord);
|
36
|
+
plen = strlen (post);
|
37
|
+
|
38
|
+
if (plen > wlen) return 0;
|
39
|
+
|
40
|
+
for (i = 0; i < plen; i++)
|
41
|
+
if (aWord[wlen - plen + i] != post[i])
|
42
|
+
return 0; /* no match */
|
43
|
+
|
44
|
+
return 1; /*word match */
|
45
|
+
}
|
46
|
+
|
47
|
+
void
|
48
|
+
ots_parse_file (FILE * stream, OtsArticle * Doc )
|
49
|
+
{
|
50
|
+
unsigned char fread_buffer[BUFFER_SIZE];
|
51
|
+
unsigned char *buffer;
|
52
|
+
size_t nread, total_read, avail_size;
|
53
|
+
|
54
|
+
buffer = g_new0 (unsigned char, BUFFER_SIZE);
|
55
|
+
|
56
|
+
avail_size = BUFFER_SIZE;
|
57
|
+
total_read = nread = 0;
|
58
|
+
while ((nread =
|
59
|
+
fread (fread_buffer, sizeof (unsigned char), sizeof (fread_buffer),
|
60
|
+
stream)) > 0)
|
61
|
+
{
|
62
|
+
if (nread + total_read > avail_size)
|
63
|
+
{
|
64
|
+
avail_size *= 2;
|
65
|
+
buffer = g_renew (unsigned char, buffer, avail_size);
|
66
|
+
}
|
67
|
+
|
68
|
+
strncpy (buffer + total_read, fread_buffer, nread);
|
69
|
+
total_read += nread;
|
70
|
+
}
|
71
|
+
|
72
|
+
ots_parse_stream (buffer, total_read, Doc);
|
73
|
+
g_free (buffer);
|
74
|
+
}
|
75
|
+
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
|
80
|
+
int
|
81
|
+
ots_parser_should_break(const char *aWord,const OtsStemRule * rule)
|
82
|
+
{
|
83
|
+
GList *li;
|
84
|
+
char *postfix;
|
85
|
+
int toBreak=0;
|
86
|
+
|
87
|
+
for (li = (GList *) rule->ParserBreak; li != NULL; li = li->next)
|
88
|
+
{
|
89
|
+
postfix=li->data;
|
90
|
+
if (ots_match_post (aWord, postfix) )
|
91
|
+
{
|
92
|
+
toBreak=1;
|
93
|
+
break;
|
94
|
+
}
|
95
|
+
|
96
|
+
}
|
97
|
+
|
98
|
+
|
99
|
+
for (li = (GList *) rule->ParserDontBreak; li != NULL; li = li->next)
|
100
|
+
{
|
101
|
+
postfix=li->data;
|
102
|
+
if (ots_match_post (aWord, postfix) )
|
103
|
+
{
|
104
|
+
toBreak=0;
|
105
|
+
break;
|
106
|
+
}
|
107
|
+
|
108
|
+
}
|
109
|
+
return toBreak;
|
110
|
+
}
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
void
|
115
|
+
ots_parse_stream(const unsigned char *utf8, size_t len, OtsArticle * Doc) /*parse the unicode stream */
|
116
|
+
{
|
117
|
+
|
118
|
+
OtsSentence *tmpLine = ots_append_line (Doc);
|
119
|
+
OtsStemRule * rule=Doc->stem;
|
120
|
+
gunichar uc;
|
121
|
+
int index = 0;
|
122
|
+
char *s = (char *) utf8;
|
123
|
+
GString *word_buffer = g_string_new (NULL);
|
124
|
+
|
125
|
+
|
126
|
+
while ((*s) && (index < len))
|
127
|
+
{
|
128
|
+
uc = g_utf8_get_char (s);
|
129
|
+
|
130
|
+
if (!g_unichar_isspace (uc)) /* space is the end of a word */
|
131
|
+
{
|
132
|
+
|
133
|
+
g_string_append_unichar(word_buffer,uc);
|
134
|
+
|
135
|
+
}
|
136
|
+
else
|
137
|
+
{
|
138
|
+
|
139
|
+
if (0<word_buffer->len)
|
140
|
+
{
|
141
|
+
ots_append_word (tmpLine, word_buffer->str);
|
142
|
+
|
143
|
+
if (ots_parser_should_break(word_buffer->str,rule)) {
|
144
|
+
tmpLine = ots_append_line (Doc); /* Add a new Line */
|
145
|
+
}
|
146
|
+
|
147
|
+
g_string_assign (word_buffer, "");
|
148
|
+
|
149
|
+
}
|
150
|
+
|
151
|
+
if (uc=='\n') {ots_append_word (tmpLine,"\n");}
|
152
|
+
else
|
153
|
+
{ots_append_word (tmpLine," ");}
|
154
|
+
|
155
|
+
g_string_assign (word_buffer,"");
|
156
|
+
}
|
157
|
+
|
158
|
+
s = g_utf8_next_char (s);
|
159
|
+
|
160
|
+
index++;
|
161
|
+
}
|
162
|
+
|
163
|
+
|
164
|
+
if (0<word_buffer->len) /*final flush*/
|
165
|
+
{
|
166
|
+
ots_append_word (tmpLine, word_buffer->str);
|
167
|
+
g_string_assign (word_buffer, "");
|
168
|
+
}
|
169
|
+
|
170
|
+
|
171
|
+
|
172
|
+
g_string_free (word_buffer, TRUE);
|
173
|
+
}
|
@@ -0,0 +1,163 @@
|
|
1
|
+
/*
|
2
|
+
* relations.c
|
3
|
+
*
|
4
|
+
* Copyright (C) 2003 Nadav Rotem <nadav256@hotmail.com>
|
5
|
+
*
|
6
|
+
* This program is free software; you can redistribute it and/or modify
|
7
|
+
* it under the terms of the GNU General Public License as published by
|
8
|
+
* the Free Software Foundation; either version 2 of the License, or
|
9
|
+
* (at your option) any later version.
|
10
|
+
*
|
11
|
+
* This program is distributed in the hope that it will be useful,
|
12
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
* GNU Library General Public License for more details.
|
15
|
+
*
|
16
|
+
* You should have received a copy of the GNU General Public License
|
17
|
+
* along with this program; if not, write to the Free Software
|
18
|
+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
19
|
+
*/
|
20
|
+
|
21
|
+
#include <stdio.h>
|
22
|
+
#include <stdlib.h>
|
23
|
+
#include <string.h>
|
24
|
+
#include "grader-tc.h"
|
25
|
+
|
26
|
+
#include "libots.h"
|
27
|
+
/*
|
28
|
+
The Inner product of two texts is defined as the number of topics they
|
29
|
+
share. This set of functions implements this relations using the ots
|
30
|
+
api.
|
31
|
+
|
32
|
+
Application: a relation between a slashdot article and a comment made
|
33
|
+
usage: ots_text_relations(story,"en",comment,"en",n);
|
34
|
+
where n is the max number of most important topics to consider; safe to give a high number (ex: 20);
|
35
|
+
|
36
|
+
returns:
|
37
|
+
0 - off topic
|
38
|
+
n - number of topics they share
|
39
|
+
|
40
|
+
*/
|
41
|
+
|
42
|
+
#define OTS_MAX_TOPIC_WORD_SIZE 256
|
43
|
+
|
44
|
+
/*Returns the number of topics that two blocks of text share*/
|
45
|
+
int ots_text_relations(
|
46
|
+
const unsigned char *text1,const unsigned char *lang_code1,
|
47
|
+
const unsigned char *text2,const unsigned char *lang_code2,const int topic_num)
|
48
|
+
{
|
49
|
+
GList* top1;
|
50
|
+
GList* top2;
|
51
|
+
int score;
|
52
|
+
|
53
|
+
top1=ots_text_stem_list(text1,lang_code1,topic_num);
|
54
|
+
top2=ots_text_stem_list(text2,lang_code2,topic_num);
|
55
|
+
|
56
|
+
score=ots_topic_list_score(top1,top2);
|
57
|
+
|
58
|
+
if (top1){g_list_foreach (top1, (GFunc) g_free, NULL);g_list_free (top1);}
|
59
|
+
if (top2){g_list_foreach (top2, (GFunc) g_free, NULL);g_list_free (top2);}
|
60
|
+
|
61
|
+
return score;
|
62
|
+
}
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
|
67
|
+
/*For a given text, return the list of the topics*/
|
68
|
+
char* ots_text_topics(
|
69
|
+
const unsigned char *text,const unsigned char *lang_code,int topic_num)
|
70
|
+
{
|
71
|
+
int i;
|
72
|
+
GString *word;
|
73
|
+
unsigned char *str;
|
74
|
+
unsigned char *tmp;
|
75
|
+
OtsArticle *Art;
|
76
|
+
|
77
|
+
if (NULL==text) return NULL;
|
78
|
+
word = g_string_new (NULL);
|
79
|
+
|
80
|
+
Art = ots_new_article ();
|
81
|
+
|
82
|
+
ots_load_xml_dictionary(Art,lang_code); /*Load the dictionary*/
|
83
|
+
if (text!=NULL) ots_parse_stream (text,strlen(text), Art); /* read text , put it in struct Article */
|
84
|
+
ots_grade_doc (Art);
|
85
|
+
|
86
|
+
|
87
|
+
for (i=0;i<=topic_num;i++)
|
88
|
+
{
|
89
|
+
tmp=ots_word_in_list(Art->ImpWords,i);
|
90
|
+
if ((tmp!=NULL)&&(strlen(tmp)>0)) {g_string_append(word,tmp);
|
91
|
+
g_string_append(word," "); }
|
92
|
+
}
|
93
|
+
|
94
|
+
|
95
|
+
str=word->str;
|
96
|
+
g_string_free (word, FALSE);
|
97
|
+
ots_free_article (Art);
|
98
|
+
|
99
|
+
return str;
|
100
|
+
}
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
/*For a given text, return the list of the stemmed topics*/
|
105
|
+
GList* ots_text_stem_list(const unsigned char *text, const unsigned char *lang_code, int topic_num)
|
106
|
+
{
|
107
|
+
int i;
|
108
|
+
GList *topics=NULL;
|
109
|
+
unsigned char *tmp;
|
110
|
+
OtsArticle *Art;
|
111
|
+
|
112
|
+
if (NULL==text) return NULL;
|
113
|
+
|
114
|
+
Art = ots_new_article ();
|
115
|
+
|
116
|
+
ots_load_xml_dictionary(Art,lang_code);
|
117
|
+
if (text!=NULL) ots_parse_stream (text,strlen(text), Art);
|
118
|
+
ots_grade_doc (Art);
|
119
|
+
|
120
|
+
|
121
|
+
for (i=0;i<=topic_num;i++)
|
122
|
+
{
|
123
|
+
tmp=ots_stem_in_list(Art->ImpWords,i);
|
124
|
+
if ((tmp)&&(strlen(tmp)>0))
|
125
|
+
topics=g_list_append(topics,g_strdup(tmp));
|
126
|
+
}
|
127
|
+
|
128
|
+
|
129
|
+
ots_free_article (Art);
|
130
|
+
return topics;
|
131
|
+
}
|
132
|
+
|
133
|
+
/*Gives a score on the relations between two lists of topics; simmilar to the inner product*/
|
134
|
+
int ots_topic_list_score(
|
135
|
+
const GList *topic_list1,
|
136
|
+
const GList *topic_list2)
|
137
|
+
{
|
138
|
+
int count=0;
|
139
|
+
GList *tmplist1;
|
140
|
+
GList *tmplist2;
|
141
|
+
|
142
|
+
if (!(topic_list1)) return 0;
|
143
|
+
if (!(topic_list2)) return 0;
|
144
|
+
|
145
|
+
tmplist1 = g_list_first(topic_list1);
|
146
|
+
while(tmplist1)
|
147
|
+
{
|
148
|
+
tmplist2 = g_list_first(topic_list2);
|
149
|
+
while(tmplist2)
|
150
|
+
{
|
151
|
+
|
152
|
+
if ((tmplist1->data)&&(tmplist2->data)&&(strlen(tmplist2->data)>1))
|
153
|
+
if (0==strncmp(tmplist1->data,tmplist2->data,OTS_MAX_TOPIC_WORD_SIZE))
|
154
|
+
{count++;}
|
155
|
+
|
156
|
+
tmplist2 = g_list_next(tmplist2);
|
157
|
+
}
|
158
|
+
tmplist1 = g_list_next(tmplist1);
|
159
|
+
}
|
160
|
+
|
161
|
+
return count;
|
162
|
+
}
|
163
|
+
|